0.4 #1

charlesll · Aug 28, 2018 · 20cd08e · 20cd08e
1 parent 46bb7d4
commit 20cd08e
Show file tree

Hide file tree

Showing 3 changed files with 8 additions and 5 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -8,12 +8,14 @@ Licence: see LICENCE.md
 
 As Rampy starts to grow, I will summarise changes in this file starting at version 0.2.6
 
-# 0.3.7
+# 0.4
 
 - BREAKING CHANGE: mlregressor is now a class and not a function anymore. You can provide directly sklearn arguments to the algorithms through dictionnaries.
 The use of the class simplifies the use of mlregressor, as the created objects saves everything!
 It also makes it very easy to change the algorithm and try something else.
 
+- rampy.chemical_splitting() allows one to select the random seed.
+
 - addition of tests and examples of the mlregressor() class and of resample() and flipsp() functions.
 
 - Correction of the rp.mixing_sp() function, rampy is now compatible with cvxpy v1.0.

diff --git a/rampy/ml_regressor.py b/rampy/ml_regressor.py
@@ -8,7 +8,7 @@
 import pandas as pd
 import numpy as np
 
-def chemical_splitting(Pandas_DataFrame, target,split_fraction):
+def chemical_splitting(Pandas_DataFrame, target, split_fraction =0.30, rand_state=42):
     """split datasets depending on their chemistry
 
         Parameters
@@ -19,7 +19,8 @@ def chemical_splitting(Pandas_DataFrame, target,split_fraction):
             The target in the DataFrame according to which we will split the dataset
         split_fraction: a float number between 0 and 1
             This is the amount of splitting you want, in reference to the second output dataset (see OUTPUTS).
-
+        rand_state : Float64
+            the random seed that is used for reproductibility of the results. Default = 42.
         Returns
         =======
             frame1 : A Pandas DataFrame
@@ -41,7 +42,7 @@ def chemical_splitting(Pandas_DataFrame, target,split_fraction):
     names_idx = np.arange(len(names))
 
     # getting index for the frames with the help of scikitlearn
-    frame1_idx, frame2_idx = model_selection.train_test_split(names_idx, test_size = split_fraction,random_state=42)
+    frame1_idx, frame2_idx = model_selection.train_test_split(names_idx, test_size = split_fraction,random_state=rand_state)
 
     # and now grabbing the relevant pandas dataframes
     ttt = np.in1d(Pandas_DataFrame[target],names[frame1_idx])

diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 from setuptools import setup, Extension
 
 setup(name='rampy',
-      version='0.3.7',
+      version='0.4.0',
       description='A Python module containing functions to treat spectroscopic (XANES, Raman, IR...) data',
       url='https://github.com/charlesll/rampy',
       author='Charles Le Losq',