# Compute property recommendations: A collaborative filtering approach

Aug 23, 2014 · 10 min read

## Computing recommendations

recommendations = {}Property.all.each do |a|  users_for_a = [...]    Property.all.each do |b|    users_for_b = [...]    users_for_a_and_b = users_for_a & users_for_b       recommendations[a.id] ||= {}    recommendations[a.id][b.id] = users_for_a_and_b.size   endend

## Exploiting parallelism

(u_a, p_h), (u_b, p_k), ....
u_a: [ p_a1, p_a2, .... p_an ]u_b: [ p_b1, p_b2, .... p_bm ]...
(p_a1, p_a2)...(p_a1, p_an)(p_a2, p_a1)...(p_a2, p_an)(p_b1, p_b2)...
(p_h, p_k, c_hk)...(p_i, p_j, c_ij)...
p_i: (p_j, c_ij), (p_k, c_ik),…...p_v: (p_z, c_vz), (p_w, c_vw),…...

## Appendix

REGISTER '$htjar';pre1 = load '$input' USING PigStorage(',') AS (user_id: int, property_id: int);/* Multiple hits to the same resource does not count */pre2 = DISTINCT pre1; /* Collect the hits for each user */r2 = GROUP pre2 by user_id; /* Clean the bag */r3 = FOREACH r2 GENERATE pre2.property_id; /* Generate all the possible <p_i, p_j> permutations */r4 = FOREACH r3 GENERATE ht.udf.Permutations($0);/* Clean the bag */ r5 = FOREACH r4 GENERATE FLATTEN($0); /* Group the occurrences of each <p_i, p_j> touple */r6 = GROUP r5 BY ($0,$1); /* Count the occurrences of each <p_i, p_j> couple and generate the triple <p_i, p_j, count> */r7 = FOREACH r6 GENERATE $0.$0, $0.$1, COUNT($1); /* Group the triples <p_i, p_j, count> using the first dimension obtaining <p_i, (<p_i, p_j, count>, ... <p_i, p_k, count>)> */r8 = GROUP r7 BY$0; /* Clean and order the collected triples <p_i, (<p_j, max_count>, ... <p_k, min_count>)> */r9 = FOREACH r8 {   ordered = ORDER r7 BY $2 DESC; ordered_prj = FOREACH ordered GENERATE$1, $2; GENERATE$0, ordered_prj; }; /* Store the result */rmf $output;store r9 into '$output';
@Testpublic void run_on_AR() throws Exception {    String[] args = {        "input=src/test/resources/property_recommender_test/log_on_ar.txt",        "output=src/test/resources/property_recommender_test/out",        "htjar=target/PropertyRecommender-0.0.1-SNAPSHOT.jar"     };    PigTest pigTest = new PigTest("src/main/resources/property_recommender_on_AR.pig", args);    pigTest.assertOutput("pre1", new String[] {         "(111,1101)",        "(111,1101)",        "(111,1102)",         "(111,1103)",         "(222,2201)",        "(222,2202)",         "(222,1101)",         "(333,3301)",        "(333,2201)",         "(333,1101)"     });    pigTest.assertOutput( "r9", new String[] {        "((1101),{((2201),2),((1103),1),((1102),1),((2202),1),((3301),1)})",        "((1102),{((1101),1),((1103),1)})",        "((1103),{((1101),1),((1102),1)})",        "((2201),{((1101),2),((2202),1),((3301),1)})",        "((2202),{((1101),1),((2201),1)})",        "((3301),{((1101),1),((2201),1)})"     });}

Written by

## Alfredo Motta

#### Full stack Software Engineer with expertise in Web, ML and Startups. Customer before tech. Data before intuition. Enjoy the learnings over being too serious.

Welcome to a place where words matter. On Medium, smart voices and original ideas take center stage - with no ads in sight. Watch
Follow all the topics you care about, and we’ll deliver the best stories for you to your homepage and inbox. Explore
Get unlimited access to the best stories on Medium — and support writers while you’re at it. Just \$5/month. Upgrade