Building Recommendation Algorithms for Social Media Platforms

4 min readApr 16, 2023

Here’s an example of a simple YouTube recommendation algorithm in Python based on user interest using content-based filtering:

# Import necessary libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Load the data
videos = pd.read_csv('videos.csv')

# Create a TF-IDF matrix for the video descriptions
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(videos['description'].fillna(''))

# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Get the top recommendations for a user based on their interests
def recommend_videos(user_interests, num_recommendations=10):
    # Create a TF-IDF vector for the user interests
    user_tfidf = tfidf.transform([user_interests])
    
    # Compute the cosine similarities between the user interests vector and the video descriptions
    cosine_sim_user = linear_kernel(user_tfidf, tfidf_matrix).flatten()
    
    # Get the indices of the top similar videos
    similar_indices = cosine_sim_user.argsort()[:-num_recommendations-1:-1]
    
    # Get the video IDs and titles of the top similar videos
    recommendations = videos.iloc[similar_indices][['video_id', 'title']]
    
    return recommendations

In this example, we first load the videos data into a dataframe and create a TF-IDF matrix for the video descriptions. We then compute the cosine similarity matrix between the videos based on their descriptions. Finally, we define a function to recommend videos for a given user based on their interests, which creates a TF-IDF vector for the user interests, computes the cosine similarities between the user interests vector and the video descriptions, and returns the top recommendations based on the most similar videos.

This is just a basic example, and there are many ways to modify and improve the recommendation algorithm for different problems.

Instagram Recommendation Algorithm

# Import necessary libraries
import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds

# Load the data
likes = pd.read_csv('likes.csv')
users = pd.read_csv('users.csv')
posts = pd.read_csv('posts.csv')

# Merge the data
data = pd.merge(pd.merge(likes, users, on='user_id'), posts, on='post_id')

# Create a user-post rating matrix
user_post_ratings = data.pivot_table(index='user_id', columns='post_id', values='like').fillna(0)

# Perform matrix factorization using Singular Value Decomposition (SVD)
U, sigma, Vt = svds(user_post_ratings, k=50)
sigma = np.diag(sigma)

# Predict ratings for unseen posts
all_user_ratings = np.dot(np.dot(U, sigma), Vt)
pred_user_ratings = pd.DataFrame(all_user_ratings, columns=user_post_ratings.columns, index=user_post_ratings.index)

# Get the top recommendations for a user
def recommend_posts(user_id, num_recommendations=10):
    user_ratings = pred_user_ratings.loc[user_id].sort_values(ascending=False)
    user_likes = likes.loc[likes['user_id'] == user_id]['post_id']
    recommendations = pd.DataFrame(columns=['post_id', 'title', 'view_count', 'score'])
    for post_id, rating in user_ratings.iteritems():
        if post_id not in user_likes:
            post = posts.loc[posts['post_id'] == post_id]
            view_count = post['view_count'].values[0]
            score = rating * view_count
            recommendations = recommendations.append({'post_id': post_id, 'title': post['title'].values[0], 'view_count': view_count, 'score': score}, ignore_index=True)
    recommendations = recommendations.sort_values(by='score', ascending=False).head(num_recommendations)
    return recommendations

In this example, we load the likes, users, and posts data into dataframes, merge them, and create a user-post rating matrix. We then perform matrix factorization using SVD to reduce the dimensionality of the data and predict ratings for unseen posts. Finally, we define a function to recommend posts for a given user, which sorts the predicted ratings by score (rating times view count) and returns the top recommendations.

Facebook Recommendation Algorithm

# Import necessary libraries
import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds

# Load the data
reactions = pd.read_csv('reactions.csv')
users = pd.read_csv('users.csv')
posts = pd.read_csv('posts.csv')

# Merge the data
data = pd.merge(pd.merge(reactions, users, on='user_id'), posts, on='post_id')

# Create a user-post rating matrix
user_post_ratings = data.pivot_table(index='user_id', columns='post_id', values='reaction').fillna(0)

# Perform matrix factorization using Singular Value Decomposition (SVD)
U, sigma, Vt = svds(user_post_ratings, k=50)
sigma = np.diag(sigma)

# Predict ratings for unseen posts
all_user_ratings = np.dot(np.dot(U, sigma), Vt)
pred_user_ratings = pd.DataFrame(all_user_ratings, columns=user_post_ratings.columns, index=user_post_ratings.index)

# Get the top recommendations for a user
def recommend_posts(user_id, num_recommendations=10):
    user_ratings = pred_user_ratings.loc[user_id].sort_values(ascending=False)
    user_reactions = reactions.loc[reactions['user_id'] == user
post_ids = user_ratings.index[user_ratings == 1].tolist()
    user_history = reactions.loc[(reactions['user_id'] == user_id) & (reactions['post_id'].isin(post_ids))]['post_id']
    recommendations = pd.DataFrame(columns=['post_id', 'title', 'view_count', 'score'])
    for post_id, rating in user_ratings.iteritems():
        if post_id not in user_history:
            post = posts.loc[posts['post_id'] == post_id]
            view_count = post['view_count'].values[0]
            score = rating * view_count
            recommendations = recommendations.append({'post_id': post_id, 'title': post['title'].values[0], 'view_count': view_count, 'score': score}, ignore_index=True)
    recommendations = recommendations.sort_values(by='score', ascending=False).head(num_recommendations)
    return recommendations

In this example, we load the reactions, users, and posts data into dataframes, merge them, and create a user-post rating matrix. We then perform matrix factorization using SVD to reduce the dimensionality of the data and predict ratings for unseen posts. Finally, we define a function to recommend posts for a given user, which sorts the predicted ratings by score (rating times view count) and returns the top recommendations, excluding posts that the user has already reacted to.

This is just a basic example, and there are many ways to modify and improve the recommendation algorithm for different problems.

Building Recommendation Algorithms for Social Media Platforms

Instagram Recommendation Algorithm

Facebook Recommendation Algorithm

Written by Mahad Ahmad