Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Big data-Ernesto #87

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
</properties>

<dependencies>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package nearsoft.academy.bigdata.recommendation;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.List;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.UserBasedRecommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;

/**
*
* @author Ernesto
*/
public class MovieRecommender {
private int totalReviews = 0, totalUsers = 0, totalProducts=0;
private UserBasedRecommender recommender;
private HashMap<String, Integer> users =new HashMap();
private Hashtable<String, Integer> products = new Hashtable<>();

public MovieRecommender(String path) throws IOException, TasteException{
start(path);
}
private void start(String path) throws FileNotFoundException, IOException, TasteException {
String pathWriter= "src/test/java/nearsoft/academy/bigdata/recommendation/movies.csv";
String userId="", productId= "", score, line;
int currentUser=0, currentProduct =0;

File file = new File(path);
BufferedWriter writer;
try (BufferedReader reader = new BufferedReader(new FileReader(file))) {
writer = new BufferedWriter(new FileWriter(pathWriter));
while((line =reader.readLine()) !=null){
String[] part=line.split(": ");
switch(part[0]){
case "product/productId":
productId = part[1];
if (!products.containsKey(productId)) {
totalProducts++;
products.put(productId,totalProducts);
currentProduct = totalProducts;
}else{
currentProduct = products.get(productId);
}
break;
case "review/userId":
userId = part[1];
if (!users.containsKey(userId)) {
totalUsers++;
users.put(userId,totalUsers);
currentUser = totalUsers;
}else{
currentUser = users.get(userId);
}
break;
case "review/score":
score = part[1];
writer.write(currentUser+","+currentProduct+","+score+"\n");
totalReviews++;
break;
}
}
reader.close();
}
writer.close();
DataModel model = new FileDataModel(new File(pathWriter));
UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
UserNeighborhood group = new ThresholdUserNeighborhood(0.1, similarity, model);
recommender = new GenericUserBasedRecommender(model, group, similarity);
}
public int getTotalReviews() {
return totalReviews;
}

public int getTotalProducts() {
return totalProducts;
}

public int getTotalUsers() {
return totalUsers;
}
public List<String> getRecommendationsForUser(String userId) throws IOException, TasteException{
List<String> list = new ArrayList<String>();
int id= users.get(userId);
List<RecommendedItem> recommendations = recommender.recommend(id, 3);
for(RecommendedItem recommendation: recommendations){
list.add(getProductName((int)recommendation.getItemID()));
}
return list;
}
public String getProductName(int value){
Enumeration e = products.keys();
while (e.hasMoreElements()){
String key = (String) e.nextElement();
if (products.get(key)==value){
return key;
}
}
return null;
}
}
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
package nearsoft.academy.bigdata.recommendation;

import org.apache.mahout.cf.taste.common.TasteException;
import org.junit.Test;

import java.io.IOException;
import java.util.List;

import org.apache.mahout.cf.taste.common.TasteException;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;
import static org.junit.matchers.JUnitMatchers.hasItem;
Expand All @@ -15,6 +13,8 @@ public class MovieRecommenderTest {
public void testDataInfo() throws IOException, TasteException {
//download movies.txt.gz from
// http://snap.stanford.edu/data/web-Movies.html
MovieRecommender recommender;

MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz");
assertEquals(7911684, recommender.getTotalReviews());
assertEquals(253059, recommender.getTotalProducts());
Expand All @@ -24,7 +24,5 @@ public void testDataInfo() throws IOException, TasteException {
assertThat(recommendations, hasItem("B0002O7Y8U"));
assertThat(recommendations, hasItem("B00004CQTF"));
assertThat(recommendations, hasItem("B000063W82"));

}

}
}
Binary file added target/big-data-1.0-SNAPSHOT.jar
Binary file not shown.
5 changes: 5 additions & 0 deletions target/maven-archiver/pom.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#Generated by Maven
#Mon Apr 20 12:03:16 MST 2020
groupId=nearsoft.academy
artifactId=big-data
version=1.0-SNAPSHOT
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
C:\Users\Ernesto\Desktop\big-data-exercises-master\src\test\java\nearsoft\academy\bigdata\recommendation\MovieRecommender.java
C:\Users\Ernesto\Desktop\big-data-exercises-master\src\test\java\nearsoft\academy\bigdata\recommendation\MovieRecommenderTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
<?xml version="1.0" encoding="UTF-8" ?>
<testsuite tests="1" failures="0" name="nearsoft.academy.bigdata.recommendation.MovieRecommenderTest" time="968.279" errors="0" skipped="0">
<properties>
<property name="java.runtime.name" value="OpenJDK Runtime Environment"/>
<property name="java.vm.version" value="13.0.2+8"/>
<property name="sun.boot.library.path" value="C:\Program Files\Java\bin"/>
<property name="maven.multiModuleProjectDirectory" value="C:\Users\Ernesto\Desktop\big-data-exercises-master"/>
<property name="java.vm.vendor" value="Oracle Corporation"/>
<property name="java.vendor.url" value="https://java.oracle.com/"/>
<property name="guice.disable.misplaced.annotation.check" value="true"/>
<property name="path.separator" value=";"/>
<property name="java.vm.name" value="OpenJDK 64-Bit Server VM"/>
<property name="sun.os.patch.level" value=""/>
<property name="user.script" value=""/>
<property name="user.country" value="MX"/>
<property name="sun.java.launcher" value="SUN_STANDARD"/>
<property name="java.vm.specification.name" value="Java Virtual Machine Specification"/>
<property name="user.dir" value="C:\Users\Ernesto\Desktop\big-data-exercises-master"/>
<property name="java.vm.compressedOopsMode" value="32-bit"/>
<property name="java.runtime.version" value="13.0.2+8"/>
<property name="os.arch" value="amd64"/>
<property name="java.io.tmpdir" value="C:\Users\Ernesto\AppData\Local\Temp\"/>
<property name="line.separator" value="
"/>
<property name="java.vm.specification.vendor" value="Oracle Corporation"/>
<property name="user.variant" value=""/>
<property name="os.name" value="Windows 8.1"/>
<property name="maven.ext.class.path" value="C:\Program Files\NetBeans-11.2\netbeans\java\maven-nblib\netbeans-eventspy.jar"/>
<property name="classworlds.conf" value="C:\Program Files\NetBeans-11.2\netbeans\java\maven\bin\m2.conf"/>
<property name="sun.jnu.encoding" value="Cp1252"/>
<property name="java.library.path" value="C:\Program Files\Java\bin;C:\WINDOWS\Sun\Java\bin;C:\WINDOWS\system32;C:\WINDOWS;C:\Program Files (x86)\Common Files\Oracle\Java\javapath;C:\Windows\system32;C:\Windows;C:\Windows\System32\Wbem;C:\Windows\System32\WindowsPowerShell\v1.0\;C:\Program Files (x86)\Java\jre1.8.0_131\bin;C:\Program Files\apache-maven-3.6.3-bin\bin;."/>
<property name="jdk.debug" value="release"/>
<property name="java.class.version" value="57.0"/>
<property name="java.specification.name" value="Java Platform API Specification"/>
<property name="sun.management.compiler" value="HotSpot 64-Bit Tiered Compilers"/>
<property name="os.version" value="6.3"/>
<property name="user.home" value="C:\Users\Ernesto"/>
<property name="user.timezone" value="America/Hermosillo"/>
<property name="file.encoding" value="UTF-8"/>
<property name="java.specification.version" value="13"/>
<property name="user.name" value="Ernesto"/>
<property name="java.class.path" value="C:\Program Files\NetBeans-11.2\netbeans\java\maven\boot\plexus-classworlds-2.5.2.jar"/>
<property name="java.vm.specification.version" value="13"/>
<property name="sun.arch.data.model" value="64"/>
<property name="sun.java.command" value="org.codehaus.plexus.classworlds.launcher.Launcher -Dmaven.ext.class.path=C:\Program Files\NetBeans-11.2\netbeans\java\maven-nblib\netbeans-eventspy.jar -Dfile.encoding=UTF-8 install"/>
<property name="java.home" value="C:\Program Files\Java"/>
<property name="user.language" value="es"/>
<property name="java.specification.vendor" value="Oracle Corporation"/>
<property name="java.vm.info" value="mixed mode, sharing"/>
<property name="java.version" value="13.0.2"/>
<property name="java.vendor" value="Oracle Corporation"/>
<property name="maven.home" value="C:\Program Files\NetBeans-11.2\netbeans\java\maven"/>
<property name="file.separator" value="\"/>
<property name="java.version.date" value="2020-01-14"/>
<property name="java.vendor.url.bug" value="https://bugreport.java.com/bugreport/"/>
<property name="sun.io.unicode.encoding" value="UnicodeLittle"/>
<property name="sun.cpu.endian" value="little"/>
<property name="sun.cpu.isalist" value="amd64"/>
</properties>
<testcase classname="nearsoft.academy.bigdata.recommendation.MovieRecommenderTest" name="testDataInfo" time="968.279"/>
</testsuite>
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
-------------------------------------------------------------------------------
Test set: nearsoft.academy.bigdata.recommendation.MovieRecommenderTest
-------------------------------------------------------------------------------
Tests run: 1, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 973.576 sec
Binary file not shown.
Binary file not shown.
Binary file not shown.