Skip to content

Commit

Permalink
add separate compilation for feature engineering
Browse files Browse the repository at this point in the history
  • Loading branch information
Sprate committed Dec 3, 2021
1 parent b110ae5 commit 15bc66d
Show file tree
Hide file tree
Showing 23 changed files with 275 additions and 102 deletions.
5 changes: 1 addition & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ if (USE_ABY3_TRUNC1)
endif(USE_ABY3_TRUNC1)

add_subdirectory(core/common)
add_subdirectory(feature/he)
add_subdirectory(core/he)
if (NOT WITH_GPU)
add_subdirectory(core/privc)
endif()
Expand Down Expand Up @@ -171,9 +171,6 @@ install(DIRECTORY "${THIRD_PARTY_PATH}/install/openssl/lib/"
install(TARGETS paddle_enc mpc_data_utils
LIBRARY DESTINATION ${PADDLE_ENCRYPTED_LIB_PATH}
LIBRARY DESTINATION ${PADDLE_ENCRYPTED_LIB_PATH})

set(FEATURE_LIB "${CMAKE_SOURCE_DIR}/feature/python/libs")
install(TARGETS he_utils LIBRARY DESTINATION ${FEATURE_LIB})

if (WITH_PSI)
install(TARGETS psi LIBRARY DESTINATION ${PADDLE_ENCRYPTED_LIB_PATH})
Expand Down
3 changes: 3 additions & 0 deletions feature/he/CMakeLists.txt → core/he/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,6 @@ include_directories(${PYTHON_INCLUDE_DIRS})
add_library(he_utils MODULE ${PYBIND_HE_SRCS})
target_link_libraries(he_utils PRIVATE pybind gmp gmpxx)
set_target_properties(he_utils PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}")

set(FEATURE_LIB "${CMAKE_SOURCE_DIR}/python/paddle_fl/feature_engineering/libs")
install(TARGETS he_utils LIBRARY DESTINATION ${FEATURE_LIB})
File renamed without changes.
File renamed without changes.
File renamed without changes.
54 changes: 0 additions & 54 deletions feature/python/example/README.md

This file was deleted.

101 changes: 101 additions & 0 deletions python/paddle_fl/feature_engineering/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
## 联邦特征工程

支持计算正样本占比、woe、iv

## 单独编译

### 环境准备
* CentOS 7 (64 bit) or Ubuntu 16.04
* Python 3.5/3.6/3.7 ( 64 bit) or above
* pip3 9.0.1+ (64 bit)
* GCC or G++ 8.2.0+
* cmake 3.15+
* grpcio
* grpcio-tools


### 克隆源码并安装

1.获取源代码
```sh
git clone https://github.com/PaddlePaddle/PaddleFL
cd /path/to/PaddleFL
mkdir build && cd build
```

2.执行部分编译指令(参照 docs/source/md/compile_and_install_cn.md )

```
cmake .. -DCMAKE_C_COMPILER=${gcc_path} -DCMAKE_CXX_COMPILER=${g++_path} -DPYTHON_EXECUTABLE=${PYTHON_EXECUTABLE} -DPYTHON_INCLUDE_DIRS=${PYTHON_INCLUDE_DIRS} -DBUILD_PADDLE_FROM_SOURCE=ON -DWITH_GRPC=ON -DWITH_GPU=OFF
```

```
cd core/he
make -j48
make install
```

3.生成grpc_pb
```
cd /path/to/PaddleFL/python
python3 paddle_fl/feature_engineering/proto/run_protogen.py
```

4.pip打包并安装
```
cd /path/to/PaddleFL/python/paddle_fl
mkdir build && cd build
python3 ../feature_engineering/setup.py sdist bdist_wheel
pip3 install dist/paddle_fl_feature_engineering-1.2.0-py3-none-any.whl -U
```
## 跟随paddlefl编译
不久后将支持

## 测试

1.准备数据
```
cd /path/to/PaddleFL/python/paddle_fl/feature_engineering/example
python3 gen_test_file.py
```
简单测试: gen_simple_file 性能测试: gen_bench_file

2.生成证书
生成grpc证书 grpc secure channel 需要

```
openssl req -newkey rsa:2048 -nodes -keyout server.key -x509 -days 3650 -out server.crt
```
示例中定义Common Name 为 metrics_service 其余为空

在example目录下会生成 server.key server.crt

3.进行测试

服务器端:python3 metrics_test_server.py

客户端: python3 metrics_test_client.py

## 构建自己的程序

我们提供了pip打包支持,用户只需在自己的程序中 import paddle_fl.feature_engineering.core 即可,grpc通信模块可由用户自定义

示例如下:

channel: grpc client channel 自定义

server: grpc server 自定义

```
#client
from paddle_fl.feature_engineering.core.federated_feature_engineering_client import FederatedFeatureEngineeringClient
fed_fea_eng_client = FederatedFeatureEngineeringClient(1024)
fed_fea_eng_client.connect(channel)
result = fed_fea_eng_client.get_woe(labels)
#server
from paddle_fl.feature_engineering.core.federated_feature_engineering_server import FederatedFeatureEngineeringServer
fed_fea_eng_server = FederatedFeatureEngineeringServer()
fed_fea_eng_server.serve(server)
woe_list = fed_fea_eng_server.get_woe(features)
```
28 changes: 28 additions & 0 deletions python/paddle_fl/feature_engineering/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Import modules.
"""

import os
import sysconfig
import sys

he_utils_path = sysconfig.get_paths()["purelib"] + "/paddle_fl/feature_engineering/libs"
he_utils_lib = he_utils_path + '/he_utils.so'
sys.path.append(he_utils_path)
os.system('patchelf --set-rpath {} {}'.format(he_utils_path, he_utils_lib))

from . import core

Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,9 @@
"""
init
"""
from .federal_feature_engineering_client import FederalFeatureEngineeringClient
from .federal_feature_engineering_server import FederalFeatureEngineeringServer

from .federated_feature_engineering_client import FederatedFeatureEngineeringClient
from .federated_feature_engineering_server import FederatedFeatureEngineeringServer

from .metrics_client import *
from .metrics_server import *
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@
# See the License for the specific language governing permissions and
# limitations under the License
"""
Federal feature engineering client-side
Federated feature engineering client-side
support postive_ratio, woe, iv, ks, auc
"""

import metrics_client as mc
from . import metrics_client as mc

class FederalFeatureEngineeringClient(object):
class FederatedFeatureEngineeringClient(object):
"""
Federal feature engineering client-side implementation
Federated feature engineering client-side implementation
"""
def __init__(self, key_len=1024):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@
# See the License for the specific language governing permissions and
# limitations under the License
"""
Federal feature engineering server-side interface
Federated feature engineering server-side interface
support postive_ratio, woe, iv, ks, auc
"""

import threading
import metrics_server as ms
from . import metrics_server as ms

class FederalFeatureEngineeringServer(object):
class FederatedFeatureEngineeringServer(object):
"""
Federal feature engineering server-side implementation
Federated feature engineering server-side implementation
"""
def serve(self, server):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,9 @@
import numpy as np
import grpc

import sys
sys.path.append("../libs")

import he_utils as hu
import metrics_pb2
import metrics_pb2_grpc
from ..proto import metrics_pb2_grpc
from ..proto import metrics_pb2

__all__ = [
'get_mpc_postive_ratio_alice',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,9 @@
import numpy as np
import grpc

import sys
sys.path.append("../libs")

import he_utils as hu
import metrics_pb2
import metrics_pb2_grpc
from ..proto import metrics_pb2
from ..proto import metrics_pb2_grpc


class MpcPositiveRatioServicer(metrics_pb2_grpc.MpcPositiveRatioServicer):
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,12 @@
mpc metrics test client-side
"""

import sys
sys.path.append("../core")

import time
import grpc
import gen_test_file
import metrics_plain

from federal_feature_engineering_client import FederalFeatureEngineeringClient
from paddle_fl.feature_engineering.core.federated_feature_engineering_client import FederatedFeatureEngineeringClient

SERVER_ADRESS = 'localhost:50051'

Expand All @@ -48,7 +45,7 @@ def postive_ratio_test_client(file_name):
"""
labels, features = gen_test_file.read_file(file_name)

fed_fea_eng_client = FederalFeatureEngineeringClient(1024)
fed_fea_eng_client = FederatedFeatureEngineeringClient(1024)
channel = gen_client_channel(SERVER_ADRESS)
fed_fea_eng_client.connect(channel)
result = fed_fea_eng_client.get_positive_ratio(labels)
Expand All @@ -63,7 +60,7 @@ def woe_test_client(file_name):
"""
labels, features = gen_test_file.read_file(file_name)

fed_fea_eng_client = FederalFeatureEngineeringClient(1024)
fed_fea_eng_client = FederatedFeatureEngineeringClient(1024)
channel = gen_client_channel(SERVER_ADRESS)
fed_fea_eng_client.connect(channel)
result = fed_fea_eng_client.get_woe(labels)
Expand All @@ -78,7 +75,7 @@ def iv_test_client(file_name):
"""
labels, features = gen_test_file.read_file(file_name)

fed_fea_eng_client = FederalFeatureEngineeringClient(1024)
fed_fea_eng_client = FederatedFeatureEngineeringClient(1024)
channel = gen_client_channel(SERVER_ADRESS)
fed_fea_eng_client.connect(channel)
result = fed_fea_eng_client.get_iv(labels)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,13 @@
mpc metrics test server-side
"""

import sys
sys.path.append("../core")

from concurrent import futures
import time
import grpc
import gen_test_file
import metrics_plain

from federal_feature_engineering_server import FederalFeatureEngineeringServer
from paddle_fl.feature_engineering.core.federated_feature_engineering_server import FederatedFeatureEngineeringServer

SERVER_ADRESS = 'localhost:50051'

Expand All @@ -51,7 +48,7 @@ def postive_ratio_test_server(file_name):
"""
labels, features = gen_test_file.read_file(file_name)
server = gen_server()
fed_fea_eng_server = FederalFeatureEngineeringServer()
fed_fea_eng_server = FederatedFeatureEngineeringServer()
fed_fea_eng_server.serve(server)
fed_fea_eng_server.get_positive_ratio(features)

Expand All @@ -62,7 +59,7 @@ def woe_test_server(file_name):
"""
labels, features = gen_test_file.read_file(file_name)
server = gen_server()
fed_fea_eng_server = FederalFeatureEngineeringServer()
fed_fea_eng_server = FederatedFeatureEngineeringServer()
fed_fea_eng_server.serve(server)
woe_list = fed_fea_eng_server.get_woe(features)
print("server woe is \n", woe_list)
Expand All @@ -74,7 +71,7 @@ def iv_test_server(file_name):
"""
labels, features = gen_test_file.read_file(file_name)
server = gen_server()
fed_fea_eng_server = FederalFeatureEngineeringServer()
fed_fea_eng_server = FederatedFeatureEngineeringServer()
fed_fea_eng_server.serve(server)
iv_list = fed_fea_eng_server.get_iv(features)
print("server iv is \n", iv_list)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,4 @@
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Import modules.
"""
from . import core
# limitations under the License
File renamed without changes.
Loading

0 comments on commit 15bc66d

Please sign in to comment.