hirofumi-s-friends · zzn2 · Dec 16, 2019 · Dec 17, 2019 · Dec 17, 2019 · Dec 17, 2019
diff --git a/azureml-designer-modules/entries/sar_score.py b/azureml-designer-modules/entries/sar_score.py
@@ -0,0 +1,50 @@
+import argparse
+
+from azureml.studio.core.data_frame_schema import DataFrameSchema
+from azureml.studio.core.io.data_frame_directory import load_model_from_directory, \
+    load_data_frame_from_directory, save_data_frame_to_directory
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        '--model',
+        help='The SAR model.',
+    )
+
+    parser.add_argument(
+        '--users', type=float,
+        help='Users to recommend for.',
+    )
+
+    parser.add_argument(
+        '--item-count-to-recommend', type=str,
+        help='Recommend this number of items.',
+    )
+
+    parser.add_argument(
+        '--output',
+        help='The recommended items.',
+    )
+
+    known_args, _ = parser.parse_known_args()
+    return known_args
+
+
+if __name__ == '__main__':
+    args = get_args()
+
+    model = load_model_from_directory(args.model).model
+    input_df = load_data_frame_from_directory(args.users).data
+
+    result = model.recommend_k_items(
+        input_df,
+        top_k=args.item_count_to_recommend,
+    )
+
+    save_data_frame_to_directory(
+        args.output,
+        result,
+        schema=DataFrameSchema.data_frame_to_dict(result),
+    )
diff --git a/azureml-designer-modules/entries/stratified_splitter_entry.py b/azureml-designer-modules/entries/stratified_splitter_entry.py
@@ -0,0 +1,93 @@
+import argparse
+
+from azureml.studio.core.logger import module_logger as logger
+from reco_utils.dataset.python_splitters import python_stratified_split
+from azureml.studio.core.data_frame_schema import DataFrameSchema
+from azureml.studio.core.io.data_frame_directory import load_data_frame_from_directory, save_data_frame_to_directory
+
+
+def get_args():
+    # TODO: this function should be replaced by a function in SDK
+    #       which can automatically generate the args object according to the YAML spec file.
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        '--input-path',
+        help='The input directory.',
+    )
+
+    parser.add_argument(
+        '--ratio', type=float,
+        help='A float parameter.',
+    )
+
+    parser.add_argument(
+        '--col-user', type=str,
+        help='A string parameter.',
+    )
+
+    parser.add_argument(
+        '--col-item', type=str,
+        help='A string parameter.',
+    )
+
+    parser.add_argument(
+        '--seed', type=int,
+        help='An int parameter.',
+    )
+
+    parser.add_argument(
+        '--output-train',
+        help='The output training data directory.',
+    )
+    parser.add_argument(
+        '--output-test',
+        help='The output test data directory.',
+    )
+
+    known_args, _ = parser.parse_known_args()
+    return known_args
+
+
+if __name__ == '__main__':
+    args = get_args()
+
+    input_df = load_data_frame_from_directory(args.input_path).data
+
+    ratio = args.ratio
+    col_user = args.col_user
+    col_item = args.col_item
+    seed = args.seed
+
+    logger.debug(f"Received parameters:")
+    logger.debug(f"Input path: {args.input_path}")
+    logger.debug(f"Ratio: {ratio}")
+    logger.debug(f"User column: {col_user}")
+    logger.debug(f"Item column: {col_item}")
+    logger.debug(f"Seed: {seed}")
+
+    logger.debug(f"Shape of loaded DataFrame: {input_df.shape}")
+    logger.debug(f"Cols of DataFrame: {input_df.columns}")
+
+    output_train, output_test = python_stratified_split(
+        input_df,
+        ratio=args.ratio,
+        col_user=args.col_user,
+        col_item=args.col_item,
+        seed=args.seed,
+    )
+
+    logger.debug(f"Output data path: {args.output_train}")
+    logger.debug(f"Output test path: {args.output_test}")
+
+    save_data_frame_to_directory(
+        args.output_train,
+        output_train,
+        schema=DataFrameSchema.data_frame_to_dict(output_train),
+    )
+    save_data_frame_to_directory(
+        args.output_test,
+        output_test,
+        schema=DataFrameSchema.data_frame_to_dict(output_test),
+    )
+
diff --git a/azureml-designer-modules/module_specs/sar_score.yaml b/azureml-designer-modules/module_specs/sar_score.yaml
@@ -0,0 +1,39 @@
+name: SAR Recommend
+# id is not needed in the future. will be calculated from name using uuid5.
+id: efd1af54-0d31-42e1-b3d5-ce3b7c538706
+version: 0.0.1
+category: Experimentation
+description: |
+    Python SAR Recommender
+    repo: https://github.com/Microsoft/Recommenders/tree/master/
+inputs:
+- name: SAR Model
+  type: ModelDirectory
+  description: The directory contains SAR model.
+  port: true
+- name: Users
+  type: DataFrameDirectory
+  description: Users to recommend for.
+  port: true
+- name: Item count to recommend
+  type: Int
+  description: Recommend this number of items.
+  min: 1
+  default: 10
+outputs:
+- name: Output
+  type: DataFrameDirectory
+  description: The recommended items.
+implementation:
+  container:
+    conda: reco_base.yaml
+    command: [
+      python,
+      azureml-designer-modules/entries/sar_score.py
+    ]
+    args: [
+      --model, {inputPath: SAR Model},
+      --users, {inputValue: Users},
+      --item-count-to-recommend, {inputValue: Item count to recommend},
+      --output, {inputValue: Output},
+    ]
diff --git a/azureml-designer-modules/module_specs/stratified_splitter.yaml b/azureml-designer-modules/module_specs/stratified_splitter.yaml
@@ -0,0 +1,56 @@
+name: Stratified Splitter
+# id is not needed in the future. will be calculated from name using uuid5.
+id: efd1af54-0d31-42e1-b3d5-ce3b7c538705
+version: 0.0.9
+category: Experimentation
+description: |
+    Python stratified splitter from CAT Recommender
+    repo: https://github.com/Microsoft/Recommenders/tree/master/
+inputs:
+- name: Input path
+  type: DataFrameDirectory
+  description: The directory contains dataframe.
+  port: true
+- name: Ratio
+  type: Float
+  optional: True
+  description: |
+      Ratio for splitting data.
+      * If it is a single float number, it splits data into two halves and the ratio argument indicates the ratio of training data set.
+      * If it is a list of float numbers, the splitter splits data into several portions corresponding to the split ratios.
+      * If a list is provided and the ratios are not summed to 1, they will be normalized.
+- name: User column
+  type: String
+  description: Column name of user IDs.
+- name: Item column
+  type: String
+  description: Column name of item IDs.
+- name: Seed
+  type: Int
+  min: 1
+  max: 100
+  default: 42
+  description: Seed.
+outputs:
+- name: Output train data
+  type: DataFrameDirectory
+  description: The output directory contains a training dataframe.
+- name: Output test data
+  type: DataFrameDirectory
+  description: The output directory contains a test dataframe.
+implementation:
+  container:
+    conda: reco_base.yaml
+    command: [
+      python,
+      azureml-designer-modules/entries/stratified_splitter_entry.py
+    ]
+    args: [
+      --input-path,   {inputPath: Input path},
+      --ratio,        {inputValue: Ratio},
+      --col-user,     {inputValue: User column},
+      --col-item,     {inputValue: Item column},
+      --seed,         {inputValue: Seed},
+      --output-train, {outputPath: Output train data},
+      --output-test,  {outputPath: Output test data},
+    ]
diff --git a/scripts/generate_conda_file.py b/scripts/generate_conda_file.py
@@ -86,6 +86,7 @@
 
 PIP_GPU = {"nvidia-ml-py3": "nvidia-ml-py3>=7.352.0"}
 PIP_PYSPARK = {"databricks-cli": "databricks-cli==0.8.6"}
+PIP_AZUREML_MODULES = {"azureml-designer-core": "azureml-designer-core==0.0.26"}
 
 PIP_DARWIN = {
     "nni": "nni==0.5.2.1.1",
@@ -116,6 +117,9 @@
     parser.add_argument(
         "--pyspark-version", help="provide specific version of PySpark to use"
     )
+    parser.add_argument(
+        "--azureml-modules", action="store_true", help="run as azureml designer modules"
+    )
     args = parser.parse_args()
 
     # check pyspark version
@@ -154,6 +158,8 @@
     if args.gpu:
         conda_packages.update(CONDA_GPU)
         pip_packages.update(PIP_GPU)
+    if args.azureml_modules:
+        pip_packages.update(PIP_AZUREML_MODULES)
 
     # check for os platform support
     if platform == 'darwin':