From 70800eead8c4678a258615408a91f6323a658c25 Mon Sep 17 00:00:00 2001 From: Hattori Keigo Date: Tue, 16 Oct 2018 12:56:25 +0900 Subject: [PATCH 1/9] Pipnize --- .gitmodules | 4 + __init__.py | 0 core/__init__.py | 0 core/predict_interface.py | 64 ----------- drucker/__init__.py | 20 ++++ .../drucker_dashboard_servicer.py | 41 ++++--- drucker/drucker_worker.py | 81 ++++++++++++++ {core => drucker}/drucker_worker_servicer.py | 66 ++++++------ drucker/grpc | 1 + drucker/logger/__init__.py | 9 ++ {logger => drucker/logger}/logger_fluent.py | 61 +++++++---- .../logger}/logger_interface.py | 0 .../logger}/logger_jsonlogger.py | 52 ++++++--- drucker/models/__init__.py | 5 + drucker/models/dao.py | 76 +++++++++++++ .../models}/model_assignment.py | 2 +- drucker/protobuf | 1 + drucker/utils/__init__.py | 99 +++++++++++++++++ models/__init__.py | 101 ------------------ setup.py | 75 +++++++++++++ template/{predict.py => app.py} | 26 ++--- test-requirements.txt | 17 +++ utils/env_loader.py | 53 --------- 23 files changed, 525 insertions(+), 329 deletions(-) create mode 100644 .gitmodules delete mode 100644 __init__.py delete mode 100644 core/__init__.py delete mode 100644 core/predict_interface.py create mode 100644 drucker/__init__.py rename {core => drucker}/drucker_dashboard_servicer.py (81%) create mode 100644 drucker/drucker_worker.py rename {core => drucker}/drucker_worker_servicer.py (82%) create mode 160000 drucker/grpc create mode 100644 drucker/logger/__init__.py rename {logger => drucker/logger}/logger_fluent.py (65%) rename {logger => drucker/logger}/logger_interface.py (100%) rename {logger => drucker/logger}/logger_jsonlogger.py (79%) create mode 100644 drucker/models/__init__.py create mode 100644 drucker/models/dao.py rename {models => drucker/models}/model_assignment.py (96%) create mode 120000 drucker/protobuf create mode 100644 drucker/utils/__init__.py delete mode 100644 models/__init__.py create mode 100644 setup.py rename template/{predict.py => app.py} (83%) create mode 100644 test-requirements.txt delete mode 100644 utils/env_loader.py diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..76e4f11 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "drucker/grpc"] + path = drucker/grpc + url = https://github.com/drucker/drucker-grpc-proto.git + branch = master diff --git a/__init__.py b/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/core/__init__.py b/core/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/core/predict_interface.py b/core/predict_interface.py deleted file mode 100644 index 3be9a6f..0000000 --- a/core/predict_interface.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -import json - -from abc import ABCMeta, abstractmethod -from enum import Enum -from typing import Union, List - -PredictLabel = Union[str, bytes, List[str], List[int], List[float]] -PredictScore = Union[float, List[float]] - - -class PredictResult: - def __init__(self, label: PredictLabel, score: PredictScore, option: dict = None): - self.label = label - self.score = score - self.option = json.dumps(option) if option is not None else '{}' - - -class EvaluateResult: - def __init__(self, num: int = None, accuracy: float = None, - precision: list = None, recall: list = None, - fvalue: list = None): - if num is None: - self.num = 0 - self.accuracy = 0.0 - self.precision = [0] - self.recall = [0] - self.fvalue = [0] - else: - self.num = num - self.accuracy = accuracy - self.precision = precision - self.recall = recall - self.fvalue = fvalue - - -class PredictInterface(metaclass=ABCMeta): - def __init__(self): - self.type_input = None - self.type_output = None - - def set_type(self, type_input: Enum, type_output: Enum) -> None: - self.type_input = type_input - self.type_output = type_output - - def get_type_input(self) -> Enum: - return self.type_input - - def get_type_output(self) -> Enum: - return self.type_output - - @abstractmethod - def load_model(self, model_path: str) -> None: - raise NotImplemented() - - @abstractmethod - def predict(self, input: PredictLabel, option: dict = None) -> PredictResult: - raise NotImplemented() - - @abstractmethod - def evaluate(self, file: bytes) -> EvaluateResult: - raise NotImplemented() diff --git a/drucker/__init__.py b/drucker/__init__.py new file mode 100644 index 0000000..1959f27 --- /dev/null +++ b/drucker/__init__.py @@ -0,0 +1,20 @@ +# Copyright 2018 The Drucker Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__project__ = 'drucker' +__version__ = "0.4.0" + +from .drucker_worker import Drucker +from .drucker_worker_servicer import DruckerInput, DruckerOutput, DruckerWorkerServicer +from .drucker_dashboard_servicer import DruckerDashboardServicer diff --git a/core/drucker_dashboard_servicer.py b/drucker/drucker_dashboard_servicer.py similarity index 81% rename from core/drucker_dashboard_servicer.py rename to drucker/drucker_dashboard_servicer.py index 891cdc9..20df0d2 100644 --- a/core/drucker_dashboard_servicer.py +++ b/drucker/drucker_dashboard_servicer.py @@ -9,18 +9,12 @@ import uuid from pathlib import Path -import drucker_pb2 -import drucker_pb2_grpc - from grpc._server import _Context from typing import Iterator -from logger.logger_interface import SystemLoggerInterface -from core.predict_interface import PredictInterface -from utils.env_loader import SERVICE_NAME, SERVICE_LEVEL, APPLICATION_NAME, SERVICE_INFRA - -from models import db, get_model_path -from models.model_assignment import ModelAssignment +from .logger import SystemLoggerInterface +from .drucker_worker import Drucker, db, ModelAssignment +from .protobuf import drucker_pb2, drucker_pb2_grpc def error_handling(error_response): @@ -83,9 +77,9 @@ class DruckerDashboardServicer(drucker_pb2_grpc.DruckerDashboardServicer): Machine learning model """ - def __init__(self, logger: SystemLoggerInterface, predictor: PredictInterface): + def __init__(self, logger: SystemLoggerInterface, app: Drucker): self.logger = logger - self.predictor = predictor + self.app = app def on_error(self, error: Exception): """ Postprocessing on error @@ -106,9 +100,10 @@ def ServiceInfo(self, ) -> drucker_pb2.ServiceInfoResponse: """ Get service info. """ - return drucker_pb2.ServiceInfoResponse(application_name=APPLICATION_NAME, - service_name=SERVICE_NAME, - service_level=SERVICE_LEVEL) + return drucker_pb2.ServiceInfoResponse( + application_name=self.app.config.APPLICATION_NAME, + service_name=self.app.config.SERVICE_NAME, + service_level=self.app.config.SERVICE_LEVEL) @error_handling(drucker_pb2.ModelResponse(status=0, message='Error: Uploading model file.')) def UploadModel(self, @@ -118,7 +113,7 @@ def UploadModel(self, """ Upload your latest ML model. """ save_path = None - tmp_path = get_model_path(uuid.uuid4().hex) + tmp_path = self.app.get_model_path(uuid.uuid4().hex) Path(tmp_path).parent.mkdir(parents=True, exist_ok=True) with open(tmp_path, 'wb') as f: for request in request_iterator: @@ -126,7 +121,7 @@ def UploadModel(self, model_data = request.data f.write(model_data) f.close() - model_path = get_model_path(save_path) + model_path = self.app.get_model_path(save_path) Path(model_path).parent.mkdir(parents=True, exist_ok=True) shutil.move(tmp_path, model_path) return drucker_pb2.ModelResponse(status=1, @@ -139,17 +134,17 @@ def SwitchModel(self, ) -> drucker_pb2.ModelResponse: """ Switch your ML model to run. """ - model_assignment = db.session.query(ModelAssignment).filter(ModelAssignment.service_name == SERVICE_NAME).one() + model_assignment = self.app.db.session.query(ModelAssignment).filter(ModelAssignment.service_name == self.app.config.SERVICE_NAME).one() model_assignment.model_path = request.path model_assignment.first_boot = False - db.session.commit() - model_path = get_model_path() + self.app.db.session.commit() + model_path = self.app.get_model_path() # :TODO: Use enum for SERVICE_INFRA - if SERVICE_INFRA == "kubernetes": + if self.app.config.SERVICE_INFRA == "kubernetes": pass - elif SERVICE_INFRA == "default": - self.predictor.load_model(model_path) + elif self.app.config.SERVICE_INFRA == "default": + self.app.load_model(model_path) return drucker_pb2.ModelResponse(status=1, message='Success: Switching model file.') @@ -164,7 +159,7 @@ def EvaluateModel(self, try: for evaluateModelRequest in request_iterator: test_data = evaluateModelRequest.data - result = self.predictor.evaluate(test_data) + result = self.app.evaluate(test_data) return drucker_pb2.EvaluateModelResponse(num=result.num, accuracy=result.accuracy, precision=result.precision, diff --git a/drucker/drucker_worker.py b/drucker/drucker_worker.py new file mode 100644 index 0000000..d0a719d --- /dev/null +++ b/drucker/drucker_worker.py @@ -0,0 +1,81 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + + +from abc import ABCMeta, abstractmethod +from enum import Enum +from sqlalchemy.sql import exists + +from .utils import DruckerConfig, PredictLabel, PredictResult, EvaluateResult +from .logger import logger +from .models import db, ModelAssignment + + +class Drucker(metaclass=ABCMeta): + __type_input = None + __type_output = None + config = None + logger = None + db = None + predictor = None + model_path = None + + def __init__(self, config_file: str = None): + self.config = DruckerConfig(config_file) + self.logger = logger + self.db = db + self.logger.init_app(self.config) + self.db.init_app(self.config) + + if self.config.TEST_MODE: + self.db.ModelBase.metadata.drop_all(self.db.engine) + self.db.ModelBase.metadata.create_all(self.db.engine) + + if not self.db.session.query(exists().where(ModelAssignment.service_name == self.config.SERVICE_NAME)).scalar(): + model_assignment = ModelAssignment() + model_assignment.service_name = self.config.SERVICE_NAME + model_assignment.model_path = self.config.FILE_MODEL + model_assignment.first_boot = True + self.db.session.add(model_assignment) + self.db.session.commit() + + self.model_path = self.get_model_path() + + def is_first_boot(self) -> bool: + try: + model_assignment = self.db.session.query(ModelAssignment).filter(ModelAssignment.service_name == self.config.SERVICE_NAME).one() + return model_assignment.first_boot + except: + return True + + def get_model_path(self, model_path: str = None) -> str: + if model_path is None: + model_path = self.config.FILE_MODEL + result = self.db.session.query(ModelAssignment). \ + filter(ModelAssignment.service_name == self.config.SERVICE_NAME). \ + one_or_none() + if result is not None: + model_path = result.model_path + return "{0}/{1}/{2}".format(self.config.DIR_MODEL, self.config.APPLICATION_NAME, model_path) + + def set_type(self, type_input: Enum, type_output: Enum) -> None: + self.__type_input = type_input + self.__type_output = type_output + + def get_type_input(self) -> Enum: + return self.__type_input + + def get_type_output(self) -> Enum: + return self.__type_output + + @abstractmethod + def load_model(self) -> None: + raise NotImplemented() + + @abstractmethod + def predict(self, input: PredictLabel, option: dict = None) -> PredictResult: + raise NotImplemented() + + @abstractmethod + def evaluate(self, file: bytes) -> EvaluateResult: + raise NotImplemented() diff --git a/core/drucker_worker_servicer.py b/drucker/drucker_worker_servicer.py similarity index 82% rename from core/drucker_worker_servicer.py rename to drucker/drucker_worker_servicer.py index 38f3337..6626fb9 100644 --- a/core/drucker_worker_servicer.py +++ b/drucker/drucker_worker_servicer.py @@ -1,8 +1,6 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -import drucker_pb2 -import drucker_pb2_grpc import json @@ -10,8 +8,10 @@ from grpc._server import _Context from typing import Iterator, Union -from logger.logger_interface import ServiceLoggerInterface -from core.predict_interface import PredictResult, PredictInterface +from .logger import ServiceLoggerInterface +from .drucker_worker import PredictResult, Drucker +from .protobuf import drucker_pb2, drucker_pb2_grpc + DruckerInput = Union[ drucker_pb2.StringInput, drucker_pb2.BytesInput, @@ -29,9 +29,9 @@ class Type(Enum): ARRAY_FLOAT = 4 ARRAY_STRING = 5 - def __init__(self, logger: ServiceLoggerInterface, predictor: PredictInterface): + def __init__(self, logger: ServiceLoggerInterface, app: Drucker): self.logger = logger - self.predictor = predictor + self.app = app def Process(self, request: DruckerInput, @@ -44,9 +44,9 @@ def Process(self, except: ioption = {request.option.val: request.option.val} - single_output = self.predictor.get_type_output() in [self.Type.STRING, self.Type.BYTES] + single_output = self.app.get_type_output() in [self.Type.STRING, self.Type.BYTES] try: - result = self.predictor.predict(input, ioption) + result = self.app.predict(input, ioption) except: if single_output: if isinstance(response, drucker_pb2.StringOutput): @@ -81,7 +81,7 @@ def Predict_String_String(self, context: _Context ) -> drucker_pb2.StringOutput: response = drucker_pb2.StringOutput() - self.predictor.set_type(self.Type.STRING, self.Type.STRING) + self.app.set_type(self.Type.STRING, self.Type.STRING) return self.Process(request, response) def Predict_String_Bytes(self, @@ -89,7 +89,7 @@ def Predict_String_Bytes(self, context: _Context ) -> drucker_pb2.BytesOutput: response = drucker_pb2.BytesOutput() - self.predictor.set_type(self.Type.STRING, self.Type.BYTES) + self.app.set_type(self.Type.STRING, self.Type.BYTES) yield self.Process(request, response) def Predict_String_ArrInt(self, @@ -97,7 +97,7 @@ def Predict_String_ArrInt(self, context: _Context ) -> drucker_pb2.ArrIntOutput: response = drucker_pb2.ArrIntOutput() - self.predictor.set_type(self.Type.STRING, self.Type.ARRAY_INT) + self.app.set_type(self.Type.STRING, self.Type.ARRAY_INT) return self.Process(request, response) def Predict_String_ArrFloat(self, @@ -105,7 +105,7 @@ def Predict_String_ArrFloat(self, context: _Context ) -> drucker_pb2.ArrFloatOutput: response = drucker_pb2.ArrFloatOutput() - self.predictor.set_type(self.Type.STRING, self.Type.ARRAY_FLOAT) + self.app.set_type(self.Type.STRING, self.Type.ARRAY_FLOAT) return self.Process(request, response) def Predict_String_ArrString(self, @@ -113,7 +113,7 @@ def Predict_String_ArrString(self, context: _Context ) -> drucker_pb2.ArrStringOutput: response = drucker_pb2.ArrStringOutput() - self.predictor.set_type(self.Type.STRING, self.Type.ARRAY_STRING) + self.app.set_type(self.Type.STRING, self.Type.ARRAY_STRING) return self.Process(request, response) def Predict_Bytes_String(self, @@ -122,7 +122,7 @@ def Predict_Bytes_String(self, ) -> drucker_pb2.StringOutput: for request in request_iterator: response = drucker_pb2.StringOutput() - self.predictor.set_type(self.Type.BYTES, self.Type.STRING) + self.app.set_type(self.Type.BYTES, self.Type.STRING) return self.Process(request, response) def Predict_Bytes_Bytes(self, @@ -131,7 +131,7 @@ def Predict_Bytes_Bytes(self, ) -> drucker_pb2.BytesOutput: for request in request_iterator: response = drucker_pb2.BytesOutput() - self.predictor.set_type(self.Type.BYTES, self.Type.BYTES) + self.app.set_type(self.Type.BYTES, self.Type.BYTES) yield self.Process(request, response) def Predict_Bytes_ArrInt(self, @@ -140,7 +140,7 @@ def Predict_Bytes_ArrInt(self, ) -> drucker_pb2.ArrIntOutput: for request in request_iterator: response = drucker_pb2.ArrIntOutput() - self.predictor.set_type(self.Type.BYTES, self.Type.ARRAY_INT) + self.app.set_type(self.Type.BYTES, self.Type.ARRAY_INT) return self.Process(request, response) def Predict_Bytes_ArrFloat(self, @@ -149,7 +149,7 @@ def Predict_Bytes_ArrFloat(self, ) -> drucker_pb2.ArrFloatOutput: for request in request_iterator: response = drucker_pb2.ArrFloatOutput() - self.predictor.set_type(self.Type.BYTES, self.Type.ARRAY_FLOAT) + self.app.set_type(self.Type.BYTES, self.Type.ARRAY_FLOAT) return self.Process(request, response) def Predict_Bytes_ArrString(self, @@ -158,7 +158,7 @@ def Predict_Bytes_ArrString(self, ) -> drucker_pb2.ArrStringOutput: for request in request_iterator: response = drucker_pb2.ArrStringOutput() - self.predictor.set_type(self.Type.BYTES, self.Type.ARRAY_STRING) + self.app.set_type(self.Type.BYTES, self.Type.ARRAY_STRING) return self.Process(request, response) def Predict_ArrInt_String(self, @@ -166,7 +166,7 @@ def Predict_ArrInt_String(self, context: _Context ) -> drucker_pb2.StringOutput: response = drucker_pb2.StringOutput() - self.predictor.set_type(self.Type.ARRAY_INT, self.Type.STRING) + self.app.set_type(self.Type.ARRAY_INT, self.Type.STRING) return self.Process(request, response) def Predict_ArrInt_Bytes(self, @@ -174,7 +174,7 @@ def Predict_ArrInt_Bytes(self, context: _Context ) -> drucker_pb2.BytesOutput: response = drucker_pb2.BytesOutput() - self.predictor.set_type(self.Type.ARRAY_INT, self.Type.BYTES) + self.app.set_type(self.Type.ARRAY_INT, self.Type.BYTES) yield self.Process(request, response) def Predict_ArrInt_ArrInt(self, @@ -182,7 +182,7 @@ def Predict_ArrInt_ArrInt(self, context: _Context ) -> drucker_pb2.ArrIntOutput: response = drucker_pb2.ArrIntOutput() - self.predictor.set_type(self.Type.ARRAY_INT, self.Type.ARRAY_INT) + self.app.set_type(self.Type.ARRAY_INT, self.Type.ARRAY_INT) return self.Process(request, response) def Predict_ArrInt_ArrFloat(self, @@ -190,7 +190,7 @@ def Predict_ArrInt_ArrFloat(self, context: _Context ) -> drucker_pb2.ArrFloatOutput: response = drucker_pb2.ArrFloatOutput() - self.predictor.set_type(self.Type.ARRAY_INT, self.Type.ARRAY_FLOAT) + self.app.set_type(self.Type.ARRAY_INT, self.Type.ARRAY_FLOAT) return self.Process(request, response) def Predict_ArrInt_ArrString(self, @@ -198,7 +198,7 @@ def Predict_ArrInt_ArrString(self, context: _Context ) -> drucker_pb2.ArrStringOutput: response = drucker_pb2.ArrStringOutput() - self.predictor.set_type(self.Type.ARRAY_INT, self.Type.ARRAY_STRING) + self.app.set_type(self.Type.ARRAY_INT, self.Type.ARRAY_STRING) return self.Process(request, response) def Predict_ArrFloat_String(self, @@ -206,7 +206,7 @@ def Predict_ArrFloat_String(self, context: _Context ) -> drucker_pb2.StringOutput: response = drucker_pb2.StringOutput() - self.predictor.set_type(self.Type.ARRAY_FLOAT, self.Type.STRING) + self.app.set_type(self.Type.ARRAY_FLOAT, self.Type.STRING) return self.Process(request, response) def Predict_ArrFloat_Bytes(self, @@ -214,7 +214,7 @@ def Predict_ArrFloat_Bytes(self, context: _Context ) -> drucker_pb2.BytesOutput: response = drucker_pb2.BytesOutput() - self.predictor.set_type(self.Type.ARRAY_FLOAT, self.Type.BYTES) + self.app.set_type(self.Type.ARRAY_FLOAT, self.Type.BYTES) yield self.Process(request, response) def Predict_ArrFloat_ArrInt(self, @@ -222,7 +222,7 @@ def Predict_ArrFloat_ArrInt(self, context: _Context ) -> drucker_pb2.ArrIntOutput: response = drucker_pb2.ArrIntOutput() - self.predictor.set_type(self.Type.ARRAY_FLOAT, self.Type.ARRAY_INT) + self.app.set_type(self.Type.ARRAY_FLOAT, self.Type.ARRAY_INT) return self.Process(request, response) def Predict_ArrFloat_ArrFloat(self, @@ -230,7 +230,7 @@ def Predict_ArrFloat_ArrFloat(self, context: _Context ) -> drucker_pb2.ArrFloatOutput: response = drucker_pb2.ArrFloatOutput() - self.predictor.set_type(self.Type.ARRAY_FLOAT, self.Type.ARRAY_FLOAT) + self.app.set_type(self.Type.ARRAY_FLOAT, self.Type.ARRAY_FLOAT) return self.Process(request, response) def Predict_ArrFloat_ArrString(self, @@ -238,7 +238,7 @@ def Predict_ArrFloat_ArrString(self, context: _Context ) -> drucker_pb2.ArrStringOutput: response = drucker_pb2.ArrStringOutput() - self.predictor.set_type(self.Type.ARRAY_FLOAT, self.Type.ARRAY_STRING) + self.app.set_type(self.Type.ARRAY_FLOAT, self.Type.ARRAY_STRING) return self.Process(request, response) def Predict_ArrString_String(self, @@ -246,7 +246,7 @@ def Predict_ArrString_String(self, context: _Context ) -> drucker_pb2.StringOutput: response = drucker_pb2.StringOutput() - self.predictor.set_type(self.Type.ARRAY_STRING, self.Type.STRING) + self.app.set_type(self.Type.ARRAY_STRING, self.Type.STRING) return self.Process(request, response) def Predict_ArrString_Bytes(self, @@ -254,7 +254,7 @@ def Predict_ArrString_Bytes(self, context: _Context ) -> drucker_pb2.BytesOutput: response = drucker_pb2.BytesOutput() - self.predictor.set_type(self.Type.ARRAY_STRING, self.Type.BYTES) + self.app.set_type(self.Type.ARRAY_STRING, self.Type.BYTES) yield self.Process(request, response) def Predict_ArrString_ArrInt(self, @@ -262,7 +262,7 @@ def Predict_ArrString_ArrInt(self, context: _Context ) -> drucker_pb2.ArrIntOutput: response = drucker_pb2.ArrIntOutput() - self.predictor.set_type(self.Type.ARRAY_STRING, self.Type.ARRAY_INT) + self.app.set_type(self.Type.ARRAY_STRING, self.Type.ARRAY_INT) return self.Process(request, response) def Predict_ArrString_ArrFloat(self, @@ -270,7 +270,7 @@ def Predict_ArrString_ArrFloat(self, context: _Context ) -> drucker_pb2.ArrFloatOutput: response = drucker_pb2.ArrFloatOutput() - self.predictor.set_type(self.Type.ARRAY_STRING, self.Type.ARRAY_FLOAT) + self.app.set_type(self.Type.ARRAY_STRING, self.Type.ARRAY_FLOAT) return self.Process(request, response) def Predict_ArrString_ArrString(self, @@ -278,5 +278,5 @@ def Predict_ArrString_ArrString(self, context: _Context ) -> drucker_pb2.ArrStringOutput: response = drucker_pb2.ArrStringOutput() - self.predictor.set_type(self.Type.ARRAY_STRING, self.Type.ARRAY_STRING) + self.app.set_type(self.Type.ARRAY_STRING, self.Type.ARRAY_STRING) return self.Process(request, response) diff --git a/drucker/grpc b/drucker/grpc new file mode 160000 index 0000000..032c5a2 --- /dev/null +++ b/drucker/grpc @@ -0,0 +1 @@ +Subproject commit 032c5a295cad51f48ded87624cf76eeb66c77228 diff --git a/drucker/logger/__init__.py b/drucker/logger/__init__.py new file mode 100644 index 0000000..abf58a0 --- /dev/null +++ b/drucker/logger/__init__.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- + + +from .logger_interface import SystemLoggerInterface, ServiceLoggerInterface +from .logger_jsonlogger import JsonSystemLogger, JsonServiceLogger +from .logger_fluent import FluentSystemLogger, FluentServiceLogger + + +logger = JsonSystemLogger() diff --git a/logger/logger_fluent.py b/drucker/logger/logger_fluent.py similarity index 65% rename from logger/logger_fluent.py rename to drucker/logger/logger_fluent.py index d9ec8b7..57fccaa 100644 --- a/logger/logger_fluent.py +++ b/drucker/logger/logger_fluent.py @@ -8,26 +8,37 @@ from fluent import handler, sender -from logger.logger_interface import SystemLoggerInterface, ServiceLoggerInterface -from utils.env_loader import ServiceEnvType, SERVICE_LEVEL_ENUM, APPLICATION_NAME +from drucker.utils import DruckerConfig +from .logger_interface import SystemLoggerInterface, ServiceLoggerInterface -class SystemLogger(SystemLoggerInterface): +class FluentSystemLogger(SystemLoggerInterface): - def __init__(self, logger_name: str = 'drucker', - log_level: int = logging.NOTSET, app_name: str = APPLICATION_NAME, - app_env: ServiceEnvType = ServiceEnvType.DEVELOPMENT) -> None: + def __init__(self, config: DruckerConfig = None) -> None: """ - constructor - :param logger_name: - :param log_level: - :param app_name: - :param app_env: + Constructor + :param config: """ super().__init__() + self.config = config + logger_name = 'drucker' + log_level = logging.NOTSET + app_name = 'NONE' + app_env = 'NONE' self.log = logging.getLogger(logger_name) self.log.setLevel(log_level) - + self.log.addHandler(self.__init_fluent_handler(app_name, app_env, log_level)) + if config is not None: + self.init_app(config) + + def init_app(self, config: DruckerConfig): + self.config = config + app_name = config.APPLICATION_NAM + app_env = config.SERVICE_LEVEL_ENUM.value + log_level = logging.NOTSET + self.log.addHandler(self.__init_fluent_handler(app_name, app_env, log_level)) + + def __init_fluent_handler(self, app_name: str, app_env: str, log_level: int): custom_format = { 'host': gethostname(), 'short_message': '%(message)s', @@ -37,12 +48,11 @@ def __init__(self, logger_name: str = 'drucker', 'ml_service': app_name, 'service_level': app_env } - fluent_handler = handler.FluentHandler('drucker') formatter = handler.FluentRecordFormatter(custom_format) fluent_handler.setFormatter(formatter) fluent_handler.setLevel(log_level) - self.log.addHandler(fluent_handler) + return fluent_handler def exception(self, message: str) -> None: """ @@ -80,19 +90,27 @@ def warn(self, message: str) -> None: self.log.warning(message, extra={'loglevel': 4}) -class ServiceLogger(ServiceLoggerInterface): +class FluentServiceLogger(ServiceLoggerInterface): - def __init__(self, app_name: str = APPLICATION_NAME, - app_env: ServiceEnvType = ServiceEnvType.DEVELOPMENT): + def __init__(self, config: DruckerConfig = None): """ - constructor - :param app_name: - :param app_env: + Constructor + :param config: """ super().__init__() + self.config = config + app_name = 'NONE' + app_env = 'NONE' self.logger = sender.FluentSender('drucker_service') self.ml_service = app_name self.service_level = app_env + if config is not None: + self.init_app(config) + + def init_app(self, config: DruckerConfig): + self.config = config + self.ml_service = config.config.APPLICATION_NAME + self.service_level = config.config.SERVICE_LEVEL_ENUM.value def emit(self, request, response, suppress_log_inout: bool = False) -> None: """ @@ -123,7 +141,6 @@ def emit(self, request, response, suppress_log_inout: bool = False) -> None: }) except: try: - SystemLogger(logger_name="ServiceLogger", app_name=APPLICATION_NAME, - app_env=SERVICE_LEVEL_ENUM).exception("can't write log") + FluentSystemLogger(self.config).exception("can't write log") except: pass diff --git a/logger/logger_interface.py b/drucker/logger/logger_interface.py similarity index 100% rename from logger/logger_interface.py rename to drucker/logger/logger_interface.py diff --git a/logger/logger_jsonlogger.py b/drucker/logger/logger_jsonlogger.py similarity index 79% rename from logger/logger_jsonlogger.py rename to drucker/logger/logger_jsonlogger.py index c338c13..4aea53f 100644 --- a/logger/logger_jsonlogger.py +++ b/drucker/logger/logger_jsonlogger.py @@ -8,11 +8,11 @@ from pythonjsonlogger import jsonlogger -from logger.logger_interface import SystemLoggerInterface, ServiceLoggerInterface -from utils.env_loader import ServiceEnvType, SERVICE_LEVEL_ENUM, APPLICATION_NAME +from drucker.utils import DruckerConfig +from .logger_interface import SystemLoggerInterface, ServiceLoggerInterface -class SystemLogger(SystemLoggerInterface): +class JsonSystemLogger(SystemLoggerInterface): class JsonFormatter(jsonlogger.JsonFormatter): def parse(self): return [ @@ -31,17 +31,18 @@ def add_fields(self, log_record, record, message_dict): log_record['timestamp'] = int(time.time() * 1000) / 1000 log_record['service'] = 'drucker' - def __init__(self, logger_name: str = 'drucker', - log_level: int = logging.NOTSET, app_name: str = APPLICATION_NAME, - app_env: ServiceEnvType = ServiceEnvType.DEVELOPMENT) -> None: + def __init__(self, config: DruckerConfig = None) -> None: """ - constructor - :param logger_name: - :param log_level: - :param app_name: - :param app_env: + Constructor + :param config: """ super().__init__() + self.config = config + logger_name = 'drucker' + log_level = logging.NOTSET + app_name = 'NONE' + app_env = 'NONE' + self.log = logging.getLogger(logger_name) handler = logging.StreamHandler() formatter = self.JsonFormatter() @@ -50,6 +51,13 @@ def __init__(self, logger_name: str = 'drucker', self.log.setLevel(log_level) self.ml_service = app_name self.service_level = app_env + if config is not None: + self.init_app(config) + + def init_app(self, config: DruckerConfig): + self.config = config + self.ml_service = config.APPLICATION_NAME + self.service_level = config.SERVICE_LEVEL_ENUM.value def exception(self, message: str) -> None: """ @@ -98,7 +106,7 @@ def warn(self, message: str) -> None: 'service_level': self.service_level}) -class ServiceLogger(ServiceLoggerInterface): +class JsonServiceLogger(ServiceLoggerInterface): class JsonFormatter(jsonlogger.JsonFormatter): def parse(self): return [ @@ -119,12 +127,16 @@ def add_fields(self, log_record, record, message_dict): log_record['timestamp'] = int(time.time() * 1000) / 1000 log_record['service'] = 'drucker' - def __init__(self, app_name: str = APPLICATION_NAME, - app_env: ServiceEnvType = ServiceEnvType.DEVELOPMENT): + def __init__(self, config: DruckerConfig = None): """ - constructor + Constructor + :param config: """ super().__init__() + self.config = config + app_name = 'NONE' + app_env = 'NONE' + self.log = logging.getLogger("drucker.service") handler = logging.StreamHandler() formatter = self.JsonFormatter() @@ -133,6 +145,13 @@ def __init__(self, app_name: str = APPLICATION_NAME, self.log.setLevel(logging.DEBUG) self.ml_service = app_name self.service_level = app_env + if config is not None: + self.init_app(config) + + def init_app(self, config: DruckerConfig): + self.config = config + self.ml_service = config.APPLICATION_NAME + self.service_level = config.SERVICE_LEVEL_ENUM.value def emit(self, request, response, suppress_log_inout: bool = False) -> None: """ @@ -154,7 +173,6 @@ def emit(self, request, response, suppress_log_inout: bool = False) -> None: 'ml_output': ml_output}) except Exception: try: - SystemLogger(logger_name="ServiceLogger", app_name=APPLICATION_NAME, - app_env=SERVICE_LEVEL_ENUM).exception("can't write log") + JsonSystemLogger(self.config).exception("can't write log") except: pass diff --git a/drucker/models/__init__.py b/drucker/models/__init__.py new file mode 100644 index 0000000..d195b04 --- /dev/null +++ b/drucker/models/__init__.py @@ -0,0 +1,5 @@ +# coding: utf-8 + + +from .dao import db +from .model_assignment import ModelAssignment diff --git a/drucker/models/dao.py b/drucker/models/dao.py new file mode 100644 index 0000000..2dba883 --- /dev/null +++ b/drucker/models/dao.py @@ -0,0 +1,76 @@ +# coding: utf-8 + + +from sqlalchemy import create_engine +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker, scoped_session + +from drucker.utils import DruckerConfig + + +class DruckerDAO(object): + """ Data Access Object + + This implementation is inspired by Flask-SQLAlchemy's one. + """ + + def __init__(self, config: DruckerConfig = None): + """ + Constructor + :param config: + """ + self.config = config + self.engine = create_engine( + 'sqlite:///:memory:', + encoding='utf-8', + echo=True + ) + self.session = scoped_session( + sessionmaker( + autocommit=False, + autoflush=False, + bind=self.engine + ) + ) + self.ModelBase = declarative_base() + if config is not None: + self.init_app(config) + + def init_app(self, config: DruckerConfig): + self.config = config + self.engine = create_engine( + self.__db_url(config), + encoding='utf-8', + echo=True + ) + self.session = scoped_session( + sessionmaker( + autocommit=False, + autoflush=False, + bind=self.engine + ) + ) + + def __db_url(self, config: DruckerConfig): + """ Get full URL for DB + + :TODO: Make configuarable of template of URL and encoding + :TODO: Use Enum for DB_MODE + :TODO: Use an appropriate "Exception" + """ + if config.DB_MODE == "sqlite": + db_name = "drucker.test.sqlite3" if config.TEST_MODE else "drucker.sqlite3" + url = f'sqlite:///{db_name}' + elif config.DB_MODE == "mysql": + host = config.DB_MYSQL_HOST + port = config.DB_MYSQL_PORT + db_name = "test_"+config.DB_MYSQL_DBNAME if config.TEST_MODE else config.DB_MYSQL_DBNAME + user = config.DB_MYSQL_USER + password = config.DB_MYSQL_PASSWORD + url = f'mysql+pymysql://{user}:{password}@{host}:{port}/{db_name}?charset=utf8' + else: + raise Exception("Invalid DB_MODE.") + return url + + +db = DruckerDAO() diff --git a/models/model_assignment.py b/drucker/models/model_assignment.py similarity index 96% rename from models/model_assignment.py rename to drucker/models/model_assignment.py index 345f16d..4e50987 100644 --- a/models/model_assignment.py +++ b/drucker/models/model_assignment.py @@ -2,7 +2,7 @@ Column, String, Boolean, UniqueConstraint ) -from models import db +from .dao import db class ModelAssignment(db.ModelBase): diff --git a/drucker/protobuf b/drucker/protobuf new file mode 120000 index 0000000..2d2cebc --- /dev/null +++ b/drucker/protobuf @@ -0,0 +1 @@ +grpc/protobuf \ No newline at end of file diff --git a/drucker/utils/__init__.py b/drucker/utils/__init__.py new file mode 100644 index 0000000..49149ba --- /dev/null +++ b/drucker/utils/__init__.py @@ -0,0 +1,99 @@ +# coding: utf-8 + + +import os +import yaml +import json + +from enum import Enum +from typing import Union, List + + +PredictLabel = Union[str, bytes, List[str], List[int], List[float]] +PredictScore = Union[float, List[float]] + + +class DruckerConfig: + TEST_MODE = None + SERVICE_PORT = None + APPLICATION_NAME = None + SERVICE_NAME = None + SERVICE_LEVEL_ENUM = None + SERVICE_INFRA = None + DIR_MODEL = None + FILE_MODEL = None + DB_MODE = None + DB_MYSQL_HOST = None + DB_MYSQL_PORT = None + DB_MYSQL_DBNAME = None + DB_MYSQL_USER = None + DB_MYSQL_PASSWORD = None + + def __init__(self, config_file: str): + settings_yaml = os.getenv("DRUCKER_SETTINGS_YAML", config_file) + config = dict() + if settings_yaml is not None: + config = yaml.load(open(settings_yaml, 'r')) + self.TEST_MODE = os.getenv("DRUCKER_TEST_MODE", config.get("test", "False")) + self.SERVICE_PORT = os.getenv("DRUCKER_SERVICE_PORT", config.get("app.port", "5000")) + self.APPLICATION_NAME = os.getenv("DRUCKER_APPLICATION_NAME", config["app.name"]) + self.SERVICE_NAME = os.getenv("DRUCKER_SERVICE_NAME", config["app.service.name"]) + service_level = os.getenv("DRUCKER_SERVICE_LEVEL", config["app.service.level"]) + self.SERVICE_LEVEL_ENUM = ServiceEnvType.to_Enum(service_level) + self.SERVICE_INFRA = os.getenv("DRUCKER_SERVICE_INFRA", "default") + self.DIR_MODEL = os.getenv("DRUCKER_SERVICE_MODEL_DIR", config.get("app.modeldir", "./model")) + self.FILE_MODEL = os.getenv("DRUCKER_SERVICE_MODEL_FILE", config.get("app.modelfile", "default.model")) + self.DB_MODE = os.getenv('DRUCKER_DB_MODE', config.get('use.db', "sqlite")) + self.DB_MYSQL_HOST = os.getenv('DRUCKER_DB_MYSQL_HOST', config.get('db.mysql.host', "")) + self.DB_MYSQL_PORT = os.getenv('DRUCKER_DB_MYSQL_PORT', config.get('db.mysql.port', "")) + self.DB_MYSQL_DBNAME = os.getenv('DRUCKER_DB_MYSQL_DBNAME', config.get('db.mysql.dbname', "")) + self.DB_MYSQL_USER = os.getenv('DRUCKER_DB_MYSQL_USER', config.get('db.mysql.user', "")) + self.DB_MYSQL_PASSWORD = os.getenv('DRUCKER_DB_MYSQL_PASSWORD', config.get('db.mysql.password', "")) + + +class ServiceEnvType(Enum): + DEVELOPMENT = 'development' + BETA = 'beta' + STAGING = 'staging' + SANDBOX = 'sandbox' + PRODUCTION = 'production' + + @classmethod + def to_Enum(cls, istr: str): + if cls.DEVELOPMENT.value == istr: + return cls.DEVELOPMENT + elif cls.BETA.value == istr: + return cls.BETA + elif cls.STAGING.value == istr: + return cls.STAGING + elif cls.SANDBOX.value == istr: + return cls.SANDBOX + elif cls.PRODUCTION == istr: + return cls.PRODUCTION + else: + return None + + +class PredictResult: + def __init__(self, label: PredictLabel, score: PredictScore, option: dict = None): + self.label = label + self.score = score + self.option = json.dumps(option) if option is not None else '{}' + + +class EvaluateResult: + def __init__(self, num: int = None, accuracy: float = None, + precision: list = None, recall: list = None, + fvalue: list = None): + if num is None: + self.num = 0 + self.accuracy = 0.0 + self.precision = [0] + self.recall = [0] + self.fvalue = [0] + else: + self.num = num + self.accuracy = accuracy + self.precision = precision + self.recall = recall + self.fvalue = fvalue diff --git a/models/__init__.py b/models/__init__.py deleted file mode 100644 index c5b3894..0000000 --- a/models/__init__.py +++ /dev/null @@ -1,101 +0,0 @@ -from sqlalchemy import create_engine -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import sessionmaker, scoped_session -from sqlalchemy.sql import exists -from utils.env_loader import ( - TEST_MODE, - DB_MODE, - DB_MYSQL_HOST, - DB_MYSQL_PORT, - DB_MYSQL_DBNAME, - DB_MYSQL_USER, - DB_MYSQL_PASSWORD, - SERVICE_NAME, - FILE_MODEL, - DIR_MODEL, - APPLICATION_NAME -) - - -def db_url(): - """ Get full URL for DB - - :TODO: Make configuarable of template of URL and encoding - :TODO: Use Enum for DB_MODE - :TODO: Use an appropriate "Exception" - """ - if DB_MODE == "sqlite": - db_name = "db.test.sqlite3" if TEST_MODE else "db.sqlite3" - url = f'sqlite:///{db_name}' - elif DB_MODE == "mysql": - host = DB_MYSQL_HOST - port = DB_MYSQL_PORT - db_name = "test_"+DB_MYSQL_DBNAME if TEST_MODE else DB_MYSQL_DBNAME - user = DB_MYSQL_USER - password = DB_MYSQL_PASSWORD - url = f'mysql+pymysql://{user}:{password}@{host}:{port}/{db_name}?charset=utf8' - else: - raise Exception("Invalid DB_MODE.") - return url - - -class DAO(object): - """ Data Access Object - - This implementation is inspired by Flask-SQLAlchemy's one. - """ - - def __init__(self): - self.engine = create_engine( - db_url(), - encoding='utf-8', - echo=True - ) - - self.session = scoped_session( - sessionmaker( - autocommit=False, - autoflush=False, - bind=self.engine - ) - ) - - self.ModelBase = declarative_base() - - -db = DAO() - -from models.model_assignment import ModelAssignment - -if TEST_MODE: - db.ModelBase.metadata.drop_all(db.engine) -db.ModelBase.metadata.create_all(db.engine) - -if not db.session.query(exists().where(ModelAssignment.service_name == SERVICE_NAME)).scalar(): - model_assignment = ModelAssignment() - model_assignment.service_name = SERVICE_NAME - model_assignment.model_path = FILE_MODEL - model_assignment.first_boot = True - db.session.add(model_assignment) - db.session.commit() - - -def get_model_path(model_path: str = None): - if model_path is None: - model_path = FILE_MODEL - - result = db.session.query(ModelAssignment). \ - filter(ModelAssignment.service_name == SERVICE_NAME). \ - one_or_none() - - if result is not None: - model_path = result.model_path - return "{0}/{1}/{2}".format(DIR_MODEL, APPLICATION_NAME, model_path) - - -SERVICE_FIRST_BOOT = True -try: - model_assignment = db.session.query(ModelAssignment).filter(ModelAssignment.service_name == SERVICE_NAME).one() - SERVICE_FIRST_BOOT = model_assignment.first_boot -except: - pass diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..343b64e --- /dev/null +++ b/setup.py @@ -0,0 +1,75 @@ +# Copyright 2018 The Drucker Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from setuptools import setup + +VERSION = "0.4.0" +PACKAGE_NAME = "drucker" +DEVELOPMENT_STATUS = "3 - Alpha" + +# To install the library, run the following +# +# python setup.py install +# +# prerequisite: setuptools +# http://pypi.python.org/pypi/setuptools + +EXTRAS = {} +REQUIRES = [] +with open('requirements.txt') as f: + for line in f: + line, _, _ = line.partition('#') + line = line.strip() + if ';' in line: + requirement, _, specifier = line.partition(';') + for_specifier = EXTRAS.setdefault(':{}'.format(specifier), []) + for_specifier.append(requirement) + else: + REQUIRES.append(line) + +with open('test-requirements.txt') as f: + TESTS_REQUIRES = f.readlines() + +with open("README.md", "r") as fh: + LONG_DESCRIPTION = fh.read() + +setup( + name=PACKAGE_NAME, + version=VERSION, + description="A Python gRPC framework for serving a machine learning module written in Python.", + author_email="", + author="Drucker", + license="Apache License Version 2.0", + url="https://github.com/drucker/drucker", + keywords=["Drucker", "Kubernetes"], + install_requires=REQUIRES, + tests_require=TESTS_REQUIRES, + extras_require=EXTRAS, + packages=['drucker', 'drucker.logger', 'drucker.utils', + 'drucker.models', 'drucker.protobuf'], + include_package_data=True, + long_description=LONG_DESCRIPTION, + classifiers=[ + "Development Status :: %s" % DEVELOPMENT_STATUS, + "Environment :: Web Environment", + "Topic :: Software Development :: Libraries :: Application Frameworks", + "Intended Audience :: Developers", + "Intended Audience :: Information Technology", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + ], +) diff --git a/template/predict.py b/template/app.py similarity index 83% rename from template/predict.py rename to template/app.py index f917176..ecb7d88 100644 --- a/template/predict.py +++ b/template/app.py @@ -8,10 +8,9 @@ from enum import Enum -from drucker.logger.logger_jsonlogger import SystemLogger -from drucker.core.predict_interface import PredictInterface, PredictLabel, PredictResult, EvaluateResult -from drucker.utils.env_loader import SERVICE_LEVEL_ENUM, APPLICATION_NAME -from drucker.models import get_model_path, SERVICE_FIRST_BOOT +from drucker.logger import JsonSystemLogger +from drucker import Drucker +from drucker.utils import PredictLabel, PredictResult, EvaluateResult ### Expansion start. You can add your necessity libraries. import numpy as np @@ -28,12 +27,11 @@ def joblib_load_from_zip(zip_name: str, file_name: str): ### Expansion end. -class Predict(PredictInterface): - def __init__(self): - super().__init__() - self.predictor = None - self.logger = SystemLogger(logger_name="drucker_predict", app_name=APPLICATION_NAME, app_env=SERVICE_LEVEL_ENUM) - self.load_model(get_model_path()) +class Predict(Drucker): + def __init__(self, config_file: str = None): + super().__init__(config_file) + self.logger = JsonSystemLogger(self) + self.load_model() def set_type(self, type_input: Enum, type_output: Enum) -> None: super().set_type(type_input, type_output) @@ -44,18 +42,16 @@ def get_type_input(self) -> Enum: def get_type_output(self) -> Enum: return super().get_type_output() - def load_model(self, model_path: str = None) -> None: + def load_model(self) -> None: """ override Load ML model. - - :param model_path: :return: """ - assert model_path is not None, \ + assert self.model_path is not None, \ 'Please specify your ML model path' try: # FIXME: This is an example. Implement HERE! - self.predictor = joblib.load(model_path) + self.predictor = joblib.load(self.model_path) # FIXME: This is Another example. You can use archived file if your algorithm requires some files. # MODEL_NAME = "20180206" # zip_name = MODEL_HOME + MODEL_NAME + ".zip" diff --git a/test-requirements.txt b/test-requirements.txt new file mode 100644 index 0000000..795b816 --- /dev/null +++ b/test-requirements.txt @@ -0,0 +1,17 @@ +grpcio==1.13.0 +grpcio-tools==1.13.0 +python-json-logger==0.1.9 +fluent-logger==0.9.3 +Flask==1.0.2 +PyYAML==3.12 +SQLAlchemy==1.2.7 +flask-sqlalchemy==2.3.2 +kubernetes==v5.0.0 +Flask-Testing==0.7.1 +Flask-Cors==3.0.4 +flask-restplus==0.11.0 +PyJWT==1.6.4 +Flask-JWT-Simple==0.0.3 +python-ldap==3.1.0 +numpy==1.14.3 +PyMySQL==0.8.0 diff --git a/utils/env_loader.py b/utils/env_loader.py deleted file mode 100644 index f9577d1..0000000 --- a/utils/env_loader.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -from enum import Enum -import yaml -import os - - -class ServiceEnvType(Enum): - DEVELOPMENT = 'development' - BETA = 'beta' - STAGING = 'staging' - SANDBOX = 'sandbox' - PRODUCTION = 'production' - - @classmethod - def to_Enum(cls, istr: str): - if cls.DEVELOPMENT.value == istr: - return cls.DEVELOPMENT - elif cls.BETA.value == istr: - return cls.BETA - elif cls.STAGING.value == istr: - return cls.STAGING - elif cls.SANDBOX.value == istr: - return cls.SANDBOX - elif cls.PRODUCTION == istr: - return cls.PRODUCTION - else: - return None - - -TEST_MODE = False if os.getenv("DRUCKER_TEST_MODE", None) is None else True - -SETTINGS_YAML = os.getenv("DRUCKER_SETTINGS_YAML", "settings.yml") -config = yaml.load(open(SETTINGS_YAML, 'r')) - -SERVICE_PORT = os.getenv("DRUCKER_SERVICE_PORT", config.get("app.port", "5000")) - -APPLICATION_NAME = os.getenv("DRUCKER_APPLICATION_NAME", config["app.name"]) -SERVICE_NAME = os.getenv("DRUCKER_SERVICE_NAME", config["app.service.name"]) -SERVICE_LEVEL = os.getenv("DRUCKER_SERVICE_LEVEL", config["app.service.level"]) -SERVICE_LEVEL_ENUM = ServiceEnvType.to_Enum(SERVICE_LEVEL) -SERVICE_INFRA = os.getenv("DRUCKER_SERVICE_INFRA", "default") - -DIR_MODEL = os.getenv("DRUCKER_SERVICE_MODEL_DIR", config.get("app.modeldir", "./model")) -FILE_MODEL = os.getenv("DRUCKER_SERVICE_MODEL_FILE", config.get("app.modelfile", "default.model")) - -DB_MODE = os.getenv('DRUCKER_DB_MODE', config.get('use.db', "sqlite")) -DB_MYSQL_HOST = os.getenv('DRUCKER_DB_MYSQL_HOST', config.get('db.mysql.host', "")) -DB_MYSQL_PORT = os.getenv('DRUCKER_DB_MYSQL_PORT', config.get('db.mysql.port', "")) -DB_MYSQL_DBNAME = os.getenv('DRUCKER_DB_MYSQL_DBNAME', config.get('db.mysql.dbname', "")) -DB_MYSQL_USER = os.getenv('DRUCKER_DB_MYSQL_USER', config.get('db.mysql.user', "")) -DB_MYSQL_PASSWORD = os.getenv('DRUCKER_DB_MYSQL_PASSWORD', config.get('db.mysql.password', "")) From 9d02a5995726bd72588b2a20dab02f8987a92def Mon Sep 17 00:00:00 2001 From: Hattori Keigo Date: Tue, 16 Oct 2018 14:10:45 +0900 Subject: [PATCH 2/9] Minor fix --- .gitignore | 4 ++-- drucker/grpc | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 26ac6fb..26d2188 100644 --- a/.gitignore +++ b/.gitignore @@ -111,8 +111,8 @@ drucker_pb2.py drucker_pb2_grpc.py # sqlite -db.sqlite3 -db.test.sqlite3 +drucker.sqlite3 +drucker.test.sqlite3 # Mac OS temporary file .DS_Store \ No newline at end of file diff --git a/drucker/grpc b/drucker/grpc index 032c5a2..26ac8fd 160000 --- a/drucker/grpc +++ b/drucker/grpc @@ -1 +1 @@ -Subproject commit 032c5a295cad51f48ded87624cf76eeb66c77228 +Subproject commit 26ac8fdd499b29e9652df377661c9de66374fc98 From 0be29d2d542c95ff1262de420d56eecc79b2b43d Mon Sep 17 00:00:00 2001 From: Hattori Keigo Date: Tue, 16 Oct 2018 14:59:57 +0900 Subject: [PATCH 3/9] Add unittest --- drucker/test/__init__.py | 69 +++++++++ drucker/test/dummy_app.py | 20 +++ drucker/test/test-settings.yml | 7 + drucker/test/test_worker_servicer.py | 218 +++++++++++++++++++++++++++ 4 files changed, 314 insertions(+) create mode 100644 drucker/test/__init__.py create mode 100644 drucker/test/dummy_app.py create mode 100644 drucker/test/test-settings.yml create mode 100644 drucker/test/test_worker_servicer.py diff --git a/drucker/test/__init__.py b/drucker/test/__init__.py new file mode 100644 index 0000000..dba9295 --- /dev/null +++ b/drucker/test/__init__.py @@ -0,0 +1,69 @@ +import os +import unittest +from functools import wraps +from unittest.mock import patch, Mock + +from drucker.protobuf import drucker_pb2 +from drucker.utils import PredictResult +from drucker.logger import JsonServiceLogger, JsonSystemLogger +from drucker.test.dummy_app import DummyApp +import drucker.drucker_worker_servicer + + +os.environ["DRUCKER_TEST_MODE"] = "True" +os.environ["DRUCKER_SETTINGS_YAML"] = "drucker/test/test-settings.yml" + +app = DummyApp() +service_logger = JsonServiceLogger(app.config) +system_logger = JsonSystemLogger(app.config) +Type = drucker.drucker_worker_servicer.DruckerWorkerServicer.Type + + +class DruckerWorkerTest(unittest.TestCase): + """DruckerWorkerTest is a base class for testing DruckerWorkerServicer. + This class create xxxOutput instance and check that the return values have correct type. + """ + + def assertStringResponse(self, response): + self.assertIsInstance(response, drucker_pb2.StringOutput) + + def assertBytesResponse(self, response): + for item in response: + self.assertIsInstance(item, drucker_pb2.BytesOutput) + + def assertArrIntResponse(self, response): + self.assertIsInstance(response, drucker_pb2.ArrIntOutput) + + def assertArrFloatResponse(self, response): + self.assertIsInstance(response, drucker_pb2.ArrFloatOutput) + + def assertArrStringResponse(self, response): + self.assertIsInstance(response, drucker_pb2.ArrStringOutput) + + +_prediction_value_map = { + Type.STRING: PredictResult('Drucker', 1.0, option={}), + Type.BYTES: PredictResult(b'\x8f\xfa;\xc8a\xa3T%', 1.0, option={}), + Type.ARRAY_INT: PredictResult([2, 3, 5, 7], [1.0, 1.0, 1.0, 1.0], option={}), + Type.ARRAY_FLOAT: PredictResult([0.78341155, 0.03166816, 0.92745938], [1.0, 1.0, 1.0], option={}), + Type.ARRAY_STRING: PredictResult(['Drucker', 'is', 'awesome'], [1.0, 1.0, 1.0], option={}), +} + + +def patch_predictor(input_type, output_type): + """Decorator to mock the predictor. + Patch the several methods of the Predict class to make a fake predictor. + """ + def test_method(func): + @wraps(func) + def inner_method(*args, **kwargs): + with patch('drucker.test.dummy_app.DummyApp.get_type_input', + new=Mock(return_value=input_type)) as _, \ + patch('drucker.test.dummy_app.DummyApp.get_type_output', + new=Mock(return_value=output_type)) as _, \ + patch('drucker.test.dummy_app.DummyApp.load_model') as _, \ + patch('drucker.test.dummy_app.DummyApp.predict', + new=Mock(return_value=_prediction_value_map[output_type])) as _: + return func(*args, **kwargs) + return inner_method + return test_method \ No newline at end of file diff --git a/drucker/test/dummy_app.py b/drucker/test/dummy_app.py new file mode 100644 index 0000000..3ada4d0 --- /dev/null +++ b/drucker/test/dummy_app.py @@ -0,0 +1,20 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + + +from drucker import Drucker +from drucker.utils import PredictLabel, PredictResult, EvaluateResult + + +class DummyApp(Drucker): + def __init__(self, config_file: str = None): + super().__init__(config_file) + + def load_model(self) -> None: + pass + + def predict(self, input: PredictLabel, option: dict = None) -> PredictResult: + pass + + def evaluate(self, file: bytes) -> EvaluateResult: + pass diff --git a/drucker/test/test-settings.yml b/drucker/test/test-settings.yml new file mode 100644 index 0000000..df99093 --- /dev/null +++ b/drucker/test/test-settings.yml @@ -0,0 +1,7 @@ +test: True +app.name: test +app.service.name: test-001 +app.service.level: development +app.modeldir: ./test-model +app.modelfile: default.model +use.db: sqlite diff --git a/drucker/test/test_worker_servicer.py b/drucker/test/test_worker_servicer.py new file mode 100644 index 0000000..1fdfdbd --- /dev/null +++ b/drucker/test/test_worker_servicer.py @@ -0,0 +1,218 @@ +from . import * + + +def _fake_string_request(): + request = drucker_pb2.StringInput() + request.input = 'Rekcurd' + request.option.val = '{}' + return request + + +def _fake_bytes_request(): + request = drucker_pb2.BytesInput() + request.input = b'\x9cT\xee\xca\x19\xbb\xa44\xfcS' + request.option.val = '{}' + return iter([request]) + + +def _fake_arrint_request(): + request = drucker_pb2.ArrIntInput() + request.input.extend([218, 81, 2, 215, 28]) + request.option.val = '{}' + return request + + +def _fake_arrfloat_request(): + request = drucker_pb2.ArrFloatInput() + request.input.extend([0.22861859, 0.90036856, 0.03665003, 0.69281863, 0.23225956]) + request.option.val = '{}' + return request + + +def _fake_arrstring_request(): + request = drucker_pb2.ArrStringInput() + request.input.extend(['Drucker', 'Docker', 'Rekcurd', 'Rekcod']) + request.option.val = '{}' + return request + + +class DruckerWorkerServicerTest(DruckerWorkerTest): + """Tests for DruckerWorkerServicer. + Initialize a servicer instance with patched predict class. + Call different methods of the servicer and check whether the return value has correct type. + """ + + @patch_predictor(Type.STRING, Type.STRING) + def test_String_String(self): + servicer = drucker.drucker_worker_servicer.DruckerWorkerServicer(logger=service_logger, app=app) + request = _fake_string_request() + response = servicer.Predict_String_String(request=request, context=None) + self.assertStringResponse(response) + + @patch_predictor(Type.STRING, Type.BYTES) + def test_String_Bytes(self): + servicer = drucker.drucker_worker_servicer.DruckerWorkerServicer(logger=service_logger, app=app) + request = _fake_string_request() + response = servicer.Predict_String_Bytes(request=request, context=None) + self.assertBytesResponse(response) + + @patch_predictor(Type.STRING, Type.ARRAY_INT) + def test_String_ArrInt(self): + servicer = drucker.drucker_worker_servicer.DruckerWorkerServicer(logger=service_logger, app=app) + request = _fake_string_request() + response = servicer.Predict_String_ArrInt(request=request, context=None) + self.assertArrIntResponse(response) + + @patch_predictor(Type.STRING, Type.ARRAY_FLOAT) + def test_String_ArrFloat(self): + servicer = drucker.drucker_worker_servicer.DruckerWorkerServicer(logger=service_logger, app=app) + request = _fake_string_request() + response = servicer.Predict_String_ArrFloat(request=request, context=None) + self.assertArrFloatResponse(response) + + @patch_predictor(Type.STRING, Type.ARRAY_STRING) + def test_String_ArrString(self): + servicer = drucker.drucker_worker_servicer.DruckerWorkerServicer(logger=service_logger, app=app) + request = _fake_string_request() + response = servicer.Predict_String_ArrString(request=request, context=None) + self.assertArrStringResponse(response) + + @patch_predictor(Type.BYTES, Type.STRING) + def test_Bytes_String(self): + servicer = drucker.drucker_worker_servicer.DruckerWorkerServicer(logger=service_logger, app=app) + request_iter = _fake_bytes_request() + response = servicer.Predict_Bytes_String(request_iterator=request_iter, context=None) + self.assertStringResponse(response) + + @patch_predictor(Type.BYTES, Type.BYTES) + def test_Bytes_Bytes(self): + servicer = drucker.drucker_worker_servicer.DruckerWorkerServicer(logger=service_logger, app=app) + request_iter = _fake_bytes_request() + response = servicer.Predict_Bytes_Bytes(request_iterator=request_iter, context=None) + self.assertBytesResponse(response) + + @patch_predictor(Type.BYTES, Type.ARRAY_INT) + def test_Bytes_ArrInt(self): + servicer = drucker.drucker_worker_servicer.DruckerWorkerServicer(logger=service_logger, app=app) + request_iter = _fake_bytes_request() + response = servicer.Predict_Bytes_ArrInt(request_iterator=request_iter, context=None) + self.assertArrIntResponse(response) + + @patch_predictor(Type.BYTES, Type.ARRAY_FLOAT) + def test_Bytes_ArrFloat(self): + servicer = drucker.drucker_worker_servicer.DruckerWorkerServicer(logger=service_logger, app=app) + request_iter = _fake_bytes_request() + response = servicer.Predict_Bytes_ArrFloat(request_iterator=request_iter, context=None) + self.assertArrFloatResponse(response) + + @patch_predictor(Type.BYTES, Type.ARRAY_STRING) + def test_Bytes_ArrString(self): + servicer = drucker.drucker_worker_servicer.DruckerWorkerServicer(logger=service_logger, app=app) + request_iter = _fake_bytes_request() + response = servicer.Predict_Bytes_ArrString(request_iterator=request_iter, context=None) + self.assertArrStringResponse(response) + + @patch_predictor(Type.ARRAY_INT, Type.STRING) + def test_ArrInt_String(self): + servicer = drucker.drucker_worker_servicer.DruckerWorkerServicer(logger=service_logger, app=app) + request = _fake_arrint_request() + response = servicer.Predict_ArrInt_String(request=request, context=None) + self.assertStringResponse(response) + + @patch_predictor(Type.ARRAY_INT, Type.BYTES) + def test_ArrInt_Bytes(self): + servicer = drucker.drucker_worker_servicer.DruckerWorkerServicer(logger=service_logger, app=app) + request = _fake_arrint_request() + response = servicer.Predict_ArrInt_Bytes(request=request, context=None) + self.assertBytesResponse(response) + + @patch_predictor(Type.ARRAY_INT, Type.ARRAY_INT) + def test_ArrInt_ArrInt(self): + servicer = drucker.drucker_worker_servicer.DruckerWorkerServicer(logger=service_logger, app=app) + request = _fake_arrint_request() + response = servicer.Predict_ArrInt_ArrInt(request=request, context=None) + self.assertArrIntResponse(response) + + @patch_predictor(Type.ARRAY_INT, Type.ARRAY_FLOAT) + def test_ArrInt_ArrFloat(self): + servicer = drucker.drucker_worker_servicer.DruckerWorkerServicer(logger=service_logger, app=app) + request = _fake_arrint_request() + response = servicer.Predict_ArrInt_ArrFloat(request=request, context=None) + self.assertArrFloatResponse(response) + + @patch_predictor(Type.ARRAY_INT, Type.ARRAY_STRING) + def test_ArrInt_ArrString(self): + servicer = drucker.drucker_worker_servicer.DruckerWorkerServicer(logger=service_logger, app=app) + request = _fake_arrint_request() + response = servicer.Predict_ArrInt_ArrString(request=request, context=None) + self.assertArrStringResponse(response) + + @patch_predictor(Type.ARRAY_FLOAT, Type.STRING) + def test_ArrFloat_String(self): + servicer = drucker.drucker_worker_servicer.DruckerWorkerServicer(logger=service_logger, app=app) + request = _fake_arrfloat_request() + response = servicer.Predict_ArrFloat_String(request=request, context=None) + self.assertStringResponse(response) + + @patch_predictor(Type.ARRAY_FLOAT, Type.BYTES) + def test_ArrFloat_Bytes(self): + servicer = drucker.drucker_worker_servicer.DruckerWorkerServicer(logger=service_logger, app=app) + request = _fake_arrfloat_request() + response = servicer.Predict_ArrFloat_Bytes(request=request, context=None) + self.assertBytesResponse(response) + + @patch_predictor(Type.ARRAY_FLOAT, Type.ARRAY_INT) + def test_ArrFloat_ArrInt(self): + servicer = drucker.drucker_worker_servicer.DruckerWorkerServicer(logger=service_logger, app=app) + request = _fake_arrfloat_request() + response = servicer.Predict_ArrFloat_ArrInt(request=request, context=None) + self.assertArrIntResponse(response) + + @patch_predictor(Type.ARRAY_FLOAT, Type.ARRAY_FLOAT) + def test_ArrFloat_ArrFloat(self): + servicer = drucker.drucker_worker_servicer.DruckerWorkerServicer(logger=service_logger, app=app) + request = _fake_arrfloat_request() + response = servicer.Predict_ArrFloat_ArrFloat(request=request, context=None) + self.assertArrFloatResponse(response) + + @patch_predictor(Type.ARRAY_FLOAT, Type.ARRAY_STRING) + def test_ArrFloat_ArrString(self): + servicer = drucker.drucker_worker_servicer.DruckerWorkerServicer(logger=service_logger, app=app) + request = _fake_arrfloat_request() + response = servicer.Predict_ArrFloat_ArrString(request=request, context=None) + self.assertArrStringResponse(response) + + @patch_predictor(Type.ARRAY_STRING, Type.STRING) + def test_ArrString_String(self): + servicer = drucker.drucker_worker_servicer.DruckerWorkerServicer(logger=service_logger, app=app) + request = _fake_arrstring_request() + response = servicer.Predict_ArrString_String(request=request, context=None) + self.assertStringResponse(response) + + @patch_predictor(Type.ARRAY_STRING, Type.BYTES) + def test_ArrString_Bytes(self): + servicer = drucker.drucker_worker_servicer.DruckerWorkerServicer(logger=service_logger, app=app) + request = _fake_arrstring_request() + response = servicer.Predict_ArrString_Bytes(request=request, context=None) + self.assertBytesResponse(response) + + @patch_predictor(Type.ARRAY_STRING, Type.ARRAY_INT) + def test_ArrString_ArrInt(self): + servicer = drucker.drucker_worker_servicer.DruckerWorkerServicer(logger=service_logger, app=app) + request = _fake_arrstring_request() + response = servicer.Predict_ArrString_ArrInt(request=request, context=None) + self.assertArrIntResponse(response) + + @patch_predictor(Type.ARRAY_STRING, Type.ARRAY_FLOAT) + def test_ArrString_ArrFloat(self): + servicer = drucker.drucker_worker_servicer.DruckerWorkerServicer(logger=service_logger, app=app) + request = _fake_arrstring_request() + response = servicer.Predict_ArrString_ArrFloat(request=request, context=None) + self.assertArrFloatResponse(response) + + @patch_predictor(Type.ARRAY_STRING, Type.ARRAY_STRING) + def test_ArrString_ArrString(self): + servicer = drucker.drucker_worker_servicer.DruckerWorkerServicer(logger=service_logger, app=app) + request = _fake_arrstring_request() + response = servicer.Predict_ArrString_ArrString(request=request, context=None) + self.assertArrStringResponse(response) From 132b01ec424360d5d2f03fab2a128f01a2046262 Mon Sep 17 00:00:00 2001 From: Hattori Keigo Date: Tue, 16 Oct 2018 16:29:28 +0900 Subject: [PATCH 4/9] Add test configuration --- .gitignore | 7 +------ .travis.yml | 23 +++++++++++++++++++++++ drucker/test/__init__.py | 2 +- test-requirements.txt | 22 +++++----------------- tox.ini | 21 +++++++++++++++++++++ 5 files changed, 51 insertions(+), 24 deletions(-) create mode 100644 .travis.yml create mode 100644 tox.ini diff --git a/.gitignore b/.gitignore index 26d2188..437d6e9 100644 --- a/.gitignore +++ b/.gitignore @@ -106,13 +106,8 @@ ENV/ # ML Model model/ -# grpc file -drucker_pb2.py -drucker_pb2_grpc.py - # sqlite -drucker.sqlite3 -drucker.test.sqlite3 +*.sqlite3 # Mac OS temporary file .DS_Store \ No newline at end of file diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..55f3edf --- /dev/null +++ b/.travis.yml @@ -0,0 +1,23 @@ +# ref: https://docs.travis-ci.com/user/languages/python +language: python +dist: trusty +sudo: true +services: +- docker + +matrix: + include: + - python: 3.4 + env: TOXENV=py34 + - python: 3.5 + env: TOXENV=py35 + - python: 3.6 + env: TOXENV=py36 + - python: 3.6 + env: TOXENV=coverage,codecov + +install: +- pip install tox + +script: +- tox \ No newline at end of file diff --git a/drucker/test/__init__.py b/drucker/test/__init__.py index dba9295..1440d93 100644 --- a/drucker/test/__init__.py +++ b/drucker/test/__init__.py @@ -66,4 +66,4 @@ def inner_method(*args, **kwargs): new=Mock(return_value=_prediction_value_map[output_type])) as _: return func(*args, **kwargs) return inner_method - return test_method \ No newline at end of file + return test_method diff --git a/test-requirements.txt b/test-requirements.txt index 795b816..a387c67 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,17 +1,5 @@ -grpcio==1.13.0 -grpcio-tools==1.13.0 -python-json-logger==0.1.9 -fluent-logger==0.9.3 -Flask==1.0.2 -PyYAML==3.12 -SQLAlchemy==1.2.7 -flask-sqlalchemy==2.3.2 -kubernetes==v5.0.0 -Flask-Testing==0.7.1 -Flask-Cors==3.0.4 -flask-restplus==0.11.0 -PyJWT==1.6.4 -Flask-JWT-Simple==0.0.3 -python-ldap==3.1.0 -numpy==1.14.3 -PyMySQL==0.8.0 +coverage==4.5.1 +nose==1.3.7 +pytest +py==1.5.4 +codecov==2.0.15 diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..077d2d0 --- /dev/null +++ b/tox.ini @@ -0,0 +1,21 @@ +[tox] +envlist = py34, py35, py36 + +[testenv] +passenv = TOXENV CI TRAVIS TRAVIS_* +usedevelop = True +install_command = pip install -U {opts} {packages} +deps = -r{toxinidir}/test-requirements.txt + -r{toxinidir}/requirements.txt +commands = + python -V + py.test -vvv -s + +[testenv:coverage] +commands = + python -V + nosetests --with-coverage --cover-package=drucker --cover-tests + +[testenv:codecov] +commands = + codecov From 16a23ab61f7ec781f653bd513323e214b4ef1414 Mon Sep 17 00:00:00 2001 From: Hattori Keigo Date: Tue, 16 Oct 2018 16:29:47 +0900 Subject: [PATCH 5/9] Add example --- template/app.py | 12 +++++------- template/server.py | 26 +++++++++++--------------- template/settings.yml | 3 +++ template/start.sh | 9 ++------- 4 files changed, 21 insertions(+), 29 deletions(-) diff --git a/template/app.py b/template/app.py index ecb7d88..d447506 100644 --- a/template/app.py +++ b/template/app.py @@ -27,10 +27,10 @@ def joblib_load_from_zip(zip_name: str, file_name: str): ### Expansion end. -class Predict(Drucker): +class MyApp(Drucker): def __init__(self, config_file: str = None): super().__init__(config_file) - self.logger = JsonSystemLogger(self) + self.logger = JsonSystemLogger(self.config) self.load_model() def set_type(self, type_input: Enum, type_output: Enum) -> None: @@ -53,16 +53,14 @@ def load_model(self) -> None: # FIXME: This is an example. Implement HERE! self.predictor = joblib.load(self.model_path) # FIXME: This is Another example. You can use archived file if your algorithm requires some files. - # MODEL_NAME = "20180206" - # zip_name = MODEL_HOME + MODEL_NAME + ".zip" - # file_name = MODEL_NAME+'/default.model' - # self.predictor = joblib_load_from_zip(zip_name, file_name) + # file_name = 'default.model' + # self.predictor = joblib_load_from_zip(self.model_path, file_name) except Exception as e: self.logger.error(str(e)) self.logger.error(traceback.format_exc()) self.predictor = None - if not SERVICE_FIRST_BOOT: + if not self.is_first_boot(): # noinspection PyProtectedMember os._exit(-1) diff --git a/template/server.py b/template/server.py index c907b32..3b1cc1f 100644 --- a/template/server.py +++ b/template/server.py @@ -6,39 +6,35 @@ import pathlib -root_path = pathlib.Path(os.path.abspath(__file__)).parent +root_path = pathlib.Path(os.path.abspath(__file__)).parent.parent sys.path.append(str(root_path)) -working_path = pathlib.Path(root_path, 'drucker') -sys.path.append(str(working_path)) from concurrent import futures import grpc -import drucker_pb2_grpc import time -from drucker.core.drucker_dashboard_servicer import DruckerDashboardServicer -from drucker.core.drucker_worker_servicer import DruckerWorkerServicer -from drucker.logger.logger_jsonlogger import SystemLogger, ServiceLogger -from predict import Predict -from drucker.utils.env_loader import SERVICE_LEVEL_ENUM, APPLICATION_NAME, SERVICE_PORT +from drucker import DruckerDashboardServicer, DruckerWorkerServicer +from drucker.logger import JsonSystemLogger, JsonServiceLogger +from drucker.protobuf import drucker_pb2_grpc +from template.app import MyApp _ONE_DAY_IN_SECONDS = 60 * 60 * 24 def serve(): - system_logger = SystemLogger(logger_name="drucker", app_name=APPLICATION_NAME, app_env=SERVICE_LEVEL_ENUM) - service_logger = ServiceLogger(app_name=APPLICATION_NAME, app_env=SERVICE_LEVEL_ENUM) - predictor = Predict() + app = MyApp("./settings.yml") + system_logger = JsonSystemLogger(app.config) + service_logger = JsonServiceLogger(app.config) system_logger.info("Wake-up drucker worker.") server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) drucker_pb2_grpc.add_DruckerDashboardServicer_to_server( - DruckerDashboardServicer(logger=system_logger, predictor=predictor), server) + DruckerDashboardServicer(logger=system_logger, app=app), server) drucker_pb2_grpc.add_DruckerWorkerServicer_to_server( - DruckerWorkerServicer(logger=service_logger, predictor=predictor), server) - server.add_insecure_port("[::]:{0}".format(SERVICE_PORT)) + DruckerWorkerServicer(logger=service_logger, app=app), server) + server.add_insecure_port("[::]:{0}".format(app.config.SERVICE_PORT)) server.start() try: while True: diff --git a/template/settings.yml b/template/settings.yml index bb6d2ab..3c077d0 100644 --- a/template/settings.yml +++ b/template/settings.yml @@ -1,6 +1,9 @@ # (Mainly) For non-kubernetes users. # Define the parameters below. +# Debug flag +test: True + # This must be unique. # You can also determine an environment variable "DRUCKER_APPLICATION_NAME". # Priority is... diff --git a/template/start.sh b/template/start.sh index a9148c6..1840ef4 100755 --- a/template/start.sh +++ b/template/start.sh @@ -7,10 +7,5 @@ set -u echo "$ECHO_PREFIX Start.." -pip install -r ./drucker-grpc-proto/requirements.txt -sh ./drucker-grpc-proto/run_codegen.sh - -pip install -r ./drucker/requirements.txt - -pip install -r requirements.txt -python server.py +pip install -r ../requirements.txt +python ./server.py From 6ed7019edab2a8372e6389ac23ecd8caccf26a1f Mon Sep 17 00:00:00 2001 From: Hattori Keigo Date: Tue, 16 Oct 2018 16:30:05 +0900 Subject: [PATCH 6/9] Add release script for pypi --- scripts/release.sh | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 scripts/release.sh diff --git a/scripts/release.sh b/scripts/release.sh new file mode 100644 index 0000000..f0b256a --- /dev/null +++ b/scripts/release.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +echo 'git clean -xdf' +echo 'python setup.py sdist' +echo 'python setup.py bdist_wheel --universal' +echo 'twine upload dist/* -r https://upload.pypi.org/legacy/ -u drucker' From b6f868d86d90830824b6f2a3f5747378958eea67 Mon Sep 17 00:00:00 2001 From: Hattori Keigo Date: Tue, 16 Oct 2018 16:30:16 +0900 Subject: [PATCH 7/9] Update readme --- README.md | 174 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 127 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index 349249e..02788fc 100644 --- a/README.md +++ b/README.md @@ -10,52 +10,83 @@ https://github.com/drucker/drucker-parent - [Drucker-client](https://github.com/drucker/drucker-client): SDK for accessing a drucker service. - [Drucker-example](https://github.com/drucker/drucker-example): Example of how to use drucker. -## Example -Example is available [here](https://github.com/drucker/drucker-example). +## Installation +From source: -## Procedures -### Git Submodule Add ``` -$ git submodule add https://github.com/drucker/drucker.git drucker -$ git submodule add https://github.com/drucker/drucker-grpc-proto.git drucker-grpc-proto -$ cp ./drucker/template/settings.yml . -$ cp ./drucker/template/predict.py . -$ cp ./drucker/template/server.py . -$ cp ./drucker/template/start.sh . +git clone --recursive https://github.com/drucker/drucker.git +cd drucker +python setup.py install ``` -### When update comes +From [PyPi](https://pypi.org/project/drucker/) directly: + ``` -$ git submodule update --recursive +pip install drucker ``` -Check the files above and if they had updates, merge them to your files. +## Example +Example is available [here](https://github.com/drucker/drucker-example). -### Edit settings.yml -``` -$ vi settings.yml -``` +### Create settings.yml (Not necessary) +Write your server configurations. The spec details are [here](./template/settings.yml) -### Edit predict.py -``` -$ vi predict.py -``` +### Create app.py +Implement `Drucker` class. -Write the following methods. +Necessity methods are following. #### load_model -Loading ML model to your ML module. This method is called on the wakeup or switch model. +ML model loading method. + +```python +def load_model(self) -> None: + try: + self.predictor = joblib.load(self.model_path) + except Exception as e: + self.logger.error(str(e)) + self.logger.error(traceback.format_exc()) + self.predictor = None + if not self.is_first_boot(): + os._exit(-1) +``` + +If you need to load more than two files to your ML module, you need to create a compressed file which includes the files it requires. You can load the file like the below. + +```python +def joblib_load_from_zip(self, zip_name: str, file_name: str): + with zipfile.ZipFile(zip_name, 'r') as zf: + with zf.open(file_name, 'r') as zipmodel: + return joblib.load(io.BufferedReader(io.BytesIO(zipmodel.read()))) + +def load_model(self) -> None: + try: + file_name = 'default.model' + self.predictor = self.joblib_load_from_zip(self.model_path, file_name) + except Exception as e: + self.logger.error(str(e)) + self.logger.error(traceback.format_exc()) + self.predictor = None + if not self.is_first_boot(): + os._exit(-1) +``` -Argument `model_path` is the path of a ML model. You can load the model like this. +#### predict +Predicting/inferring method. +```python +def predict(self, input: PredictLabel, option: dict = None) -> PredictResult: + try: + label_predict = self.predictor.predict( + np.array([input], dtype='float64')).tolist() + return PredictResult(label_predict, [1] * len(label_predict), option={}) + except Exception as e: + self.logger.error(str(e)) + self.logger.error(traceback.format_exc()) + raise e ``` -self.predictor = joblib.load(model_path) -``` - -We recommend the architecture of "1 Drucker loads 1 file" but sometimes your module needs a several files to load. In that case you need to create a compressed file including the files it requires. `model_path` will be your compressed file and you decompress it by yourself. -#### predict -Predicting or inferencing from the input. The definitions of input or output are described below. `bytes` can be a byte data of a file. +Input/output specs are below. ##### Input format *V* is the length of feature vector. @@ -81,7 +112,9 @@ The "option" field needs to be a json format. Any style is Ok but we have some r |option |string |Option field. Must be json format. | #### evaluate (TODO) -Evaluating the precision, recall and f1value of your ML model. The definitions of input or output are described below. +Evaluating method. + +This method is under construction. ##### Input format |Field |Type |Description | @@ -99,36 +132,83 @@ Evaluating the precision, recall and f1value of your ML model. The definitions o |recall
(required) |double[*N*][*M*] |Recall. | |fvalue
(required) |double[*N*][*M*] |F1 value. | -### Edit server.py -``` -$ vi server.py -``` +### Create server.py +Create a boot script. -Since `drucker_pb2_grpc` is automatically generated from `drucker.proto`, you don't need to care about it. You need to implement the interface class of `SystemLoggerInterface` and `ServiceLoggerInterface` if you customize the log output. +```python +from concurrent import futures +import grpc +import time -### Edit start.sh -``` -$ vi start.sh -``` +from drucker import DruckerDashboardServicer, DruckerWorkerServicer +from drucker.logger import JsonSystemLogger, JsonServiceLogger +from drucker.protobuf import drucker_pb2_grpc +from app import MyApp + +_ONE_DAY_IN_SECONDS = 60 * 60 * 24 + + +def serve(): + app = MyApp("./settings.yml") + system_logger = JsonSystemLogger(app.config) + service_logger = JsonServiceLogger(app.config) + system_logger.info("Wake-up drucker worker.") -Write the necessity script to boot your Drucker service. Minimum requirement is below. + server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) + drucker_pb2_grpc.add_DruckerDashboardServicer_to_server( + DruckerDashboardServicer(logger=system_logger, app=app), server) + drucker_pb2_grpc.add_DruckerWorkerServicer_to_server( + DruckerWorkerServicer(logger=service_logger, app=app), server) + server.add_insecure_port("[::]:{0}".format(app.config.SERVICE_PORT)) + server.start() + try: + while True: + time.sleep(_ONE_DAY_IN_SECONDS) + except KeyboardInterrupt: + system_logger.info("Shutdown drucker worker.") + server.stop(0) + + +if __name__ == '__main__': + serve() ``` -pip install -r ./drucker-grpc-proto/requirements.txt -sh ./drucker-grpc-proto/run_codegen.sh -python server.py +### Create logger (Not necessary) +If you want to use your own format logger, please implement the drucker [logger interface class](./drucker/logger/logger_interface.py). + +### Create start.sh +Create a boot script. + +```sh +#!/usr/bin/env bash + +ECHO_PREFIX="[drucker example]: " + +set -e +set -u + +echo "$ECHO_PREFIX Start.." + +pip install -r requirements.txt +python ./server.py + ``` -### Run it! +### Run ``` $ sh start.sh ``` +### Test +``` +$ python -m unittest drucker/test/test_worker_servicer.py +``` + ## Drucker on Kubernetes -Drucker dashboard makes it easy to launch Drucker service on Kubernetes. +Drucker can be run on Kubernetes and can be managed by Drucker dashboard. -You must read followings. +You must read the followings. 1. https://github.com/drucker/drucker-parent/tree/master/docs/Installation.md 1. https://github.com/drucker/drucker-dashboard/README.md From af4ce9cb45122d7a569061e137eab3423f9e2a42 Mon Sep 17 00:00:00 2001 From: Hattori Keigo Date: Tue, 16 Oct 2018 17:45:09 +0900 Subject: [PATCH 8/9] Change submodule head --- drucker/grpc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drucker/grpc b/drucker/grpc index 26ac8fd..5e57f2f 160000 --- a/drucker/grpc +++ b/drucker/grpc @@ -1 +1 @@ -Subproject commit 26ac8fdd499b29e9652df377661c9de66374fc98 +Subproject commit 5e57f2f4c30576e705acd61d851ce8a4045ce036 From 5f33b1b983ceef83d773620d29efd6048164fbd3 Mon Sep 17 00:00:00 2001 From: Hattori Keigo Date: Wed, 17 Oct 2018 10:31:16 +0900 Subject: [PATCH 9/9] Fix typo --- drucker/logger/logger_fluent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drucker/logger/logger_fluent.py b/drucker/logger/logger_fluent.py index 57fccaa..98cca4e 100644 --- a/drucker/logger/logger_fluent.py +++ b/drucker/logger/logger_fluent.py @@ -33,7 +33,7 @@ def __init__(self, config: DruckerConfig = None) -> None: def init_app(self, config: DruckerConfig): self.config = config - app_name = config.APPLICATION_NAM + app_name = config.APPLICATION_NAME app_env = config.SERVICE_LEVEL_ENUM.value log_level = logging.NOTSET self.log.addHandler(self.__init_fluent_handler(app_name, app_env, log_level))