Add MLlib logistic regression example in Python

witgo · Apr 15, 2014 · 1e1bd0f · 1e1bd0f
1 parent 88bc01f
commit 1e1bd0f
Showing 1 changed file with 50 additions and 0 deletions.
diff --git a/python/examples/mllib/logistic_regression.py b/python/examples/mllib/logistic_regression.py
@@ -0,0 +1,50 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Logistic regression using MLlib.
+
+This example requires NumPy (http://www.numpy.org/).
+"""
+
+from math import exp
+import sys
+
+import numpy as np
+from pyspark import SparkContext
+from pyspark.mllib.regression import LabeledPoint
+from pyspark.mllib.classification import LogisticRegressionWithSGD
+
+
+# Parse a line of text into an MLlib LabeledPoint object
+def parsePoint(line):
+    values = [float(s) for s in line.split(' ')]
+    if values[0] == -1:   # Convert -1 labels to 0 for MLlib
+        values[0] = 0
+    return LabeledPoint(values[0], values[1:])
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 4:
+        print >> sys.stderr, "Usage: logistic_regression <master> <file> <iters>"
+        exit(-1)
+    sc = SparkContext(sys.argv[1], "PythonLR")
+    points = sc.textFile(sys.argv[2]).map(parsePoint)
+    iterations = int(sys.argv[3])
+    model = LogisticRegressionWithSGD.train(points, iterations)
+    print "Final weights: " + str(model.weights)
+    print "Final intercept: " + str(model.intercept)