diff --git a/.classpath b/.classpath new file mode 100644 index 0000000..1247f9f --- /dev/null +++ b/.classpath @@ -0,0 +1,27 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b3dd771 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/target/ +/src/data.csv +/src/movies.txt diff --git a/.project b/.project new file mode 100644 index 0000000..3f6ba3a --- /dev/null +++ b/.project @@ -0,0 +1,23 @@ + + + big-data-exercises + + + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.m2e.core.maven2Builder + + + + + + org.eclipse.jdt.core.javanature + org.eclipse.m2e.core.maven2Nature + + diff --git a/.settings/org.eclipse.core.resources.prefs b/.settings/org.eclipse.core.resources.prefs new file mode 100644 index 0000000..8dd9b1d --- /dev/null +++ b/.settings/org.eclipse.core.resources.prefs @@ -0,0 +1,3 @@ +eclipse.preferences.version=1 +encoding//src/test/java=UTF-8 +encoding/=UTF-8 diff --git a/.settings/org.eclipse.jdt.core.prefs b/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 0000000..b9a1035 --- /dev/null +++ b/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,15 @@ +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7 +org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve +org.eclipse.jdt.core.compiler.compliance=1.7 +org.eclipse.jdt.core.compiler.debug.lineNumber=generate +org.eclipse.jdt.core.compiler.debug.localVariable=generate +org.eclipse.jdt.core.compiler.debug.sourceFile=generate +org.eclipse.jdt.core.compiler.problem.assertIdentifier=error +org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled +org.eclipse.jdt.core.compiler.problem.enumIdentifier=error +org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning +org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore +org.eclipse.jdt.core.compiler.release=disabled +org.eclipse.jdt.core.compiler.source=1.7 diff --git a/pom.xml b/pom.xml index 8169ff7..2199a0f 100644 --- a/pom.xml +++ b/pom.xml @@ -26,5 +26,15 @@ 4.7 test + + org.slf4j + slf4j-api + 1.7.5 + + + org.slf4j + slf4j-simple + 1.6.4 + diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java new file mode 100644 index 0000000..57bcf16 --- /dev/null +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java @@ -0,0 +1,163 @@ +package nearsoft.academy.bigdata.recommendation; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Scanner; + +import org.apache.commons.collections.map.HashedMap; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; +import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; +import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; +import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.cf.taste.recommender.UserBasedRecommender; +import org.apache.mahout.cf.taste.similarity.UserSimilarity; + +public class MovieRecommender { + // variables + private int totalReviews, totalProducts, totalUsers; + private HashMap mapUsers = new HashMap<>(); + private HashMap mapProducts = new HashMap<>(); + private DataModel dataModel; + private UserSimilarity similarity; + private UserNeighborhood neighborhood; + private UserBasedRecommender recommender; + private String inputPath, outputPath; + + //constructor + public MovieRecommender(String inputPath) throws IOException, TasteException { + this.outputPath = "src/data.csv"; + + this.inputPath = inputPath; + this.totalReviews = 0; + this.totalProducts = 0; + this.totalUsers = 0; + + this.loadData(); + + + this.dataModel = new FileDataModel(new File(this.outputPath)); + this.similarity = new PearsonCorrelationSimilarity(this.dataModel); + this.neighborhood = new ThresholdUserNeighborhood(0.1, this.similarity, this.dataModel); + this.recommender = new GenericUserBasedRecommender(this.dataModel, this.neighborhood, this.similarity); + + } + + public int getTotalReviews() { + return totalReviews; + } + + public int getTotalProducts() { + return totalProducts; + } + + public int getTotalUsers() { + return totalUsers; + } + + public void loadData() throws IOException { + BufferedReader br = new BufferedReader(new FileReader(this.inputPath)); + BufferedWriter bw = new BufferedWriter(new FileWriter("src/data.csv")); + String line, productId = "", userId = "", score = ""; + + //counters for translation + int currentIdUser = 0, currentIdProduct = 0; + + line = br.readLine(); + while(line != null) { + + if(line.contains("product/productId")) { + productId = line.substring(18).trim(); + if(!mapProducts.containsKey(productId)) { + currentIdProduct++; + mapProducts.put(productId, currentIdProduct); + // increase total products + this.totalProducts++; + } else { + currentIdProduct = mapProducts.get(productId); + } + // increase total reviews + this.totalReviews++; + } + + if(line.contains("review/userId")) { + userId = line.substring(14).trim(); + if(!mapUsers.containsKey(userId)) { + currentIdUser++; + mapUsers.put(userId, currentIdUser); + // increase total users + this.totalUsers++; + } else { + currentIdUser = mapUsers.get(userId); + } + } + + if(line.contains("review/score")) { + score = String.valueOf( (int) Float.parseFloat(line.substring(13).trim())); + bw.write(currentIdUser+","+currentIdProduct+","+score+"\n"); + } + + line = br.readLine(); + } + + br.close(); + bw.close(); + } + + public List getRecommendationsForUser(String userId) throws TasteException { + List list = new ArrayList(); + + int translatedId = mapUsers.get(userId); + List recommendations = recommender.recommend(translatedId, 3); + + for (RecommendedItem recommendation: recommendations) { + list.add(MapUtils.getKey(this.mapProducts, (int) recommendation.getItemID())); + } + + return list; + } + + /*public static void main(String[] args) throws IOException, TasteException { + MovieRecommender movieRecommender = new MovieRecommender("src/movies.txt"); + + System.out.println(" my reviews " + movieRecommender.getTotalReviews()); + System.out.println(" my products " + movieRecommender.getTotalProducts()); + System.out.println(" my users " + movieRecommender.getTotalUsers()); + + List recommendations = movieRecommender.getRecommendationsForUser("A141HP4LYPWMSR"); + for (String recommendation : recommendations) { + System.out.println(recommendation); + } + + + }*/ + +} + + +// method to get Map's key from value in java +class MapUtils { + public static K getKey(Map map, V value) { + for (K key : map.keySet()) { + if (value.equals(map.get(key))) { + return key; + } + } + return null; + } +} diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java index 0d0b1fe..106e2f7 100644 --- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java @@ -15,7 +15,7 @@ public class MovieRecommenderTest { public void testDataInfo() throws IOException, TasteException { //download movies.txt.gz from // http://snap.stanford.edu/data/web-Movies.html - MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz"); + MovieRecommender recommender = new MovieRecommender("src/movies.txt"); assertEquals(7911684, recommender.getTotalReviews()); assertEquals(253059, recommender.getTotalProducts()); assertEquals(889176, recommender.getTotalUsers());