Analytics Vidhya
Published in

Analytics Vidhya

Analyzing Brand Sentiment With Robinhood, Gamestop & R

Using relevant tweets to understand how people feel about brands (Now you can put that on your resume!)

All the emotions, as demonstrated by eggs! — Photo by Tengyart from Unsplash
Figure 1 — Very negative sentiment overall for Robinhood after halting trading

Step 1: Load the Packages & Download the Data

rh_tweets_day <- searchTwitter(
searchString = "#robinhood -filter:retweets", n = 4000, # searches for #robinhood and grabs 4000 tweets, but no retweets
since = "2021-01-01", until = "2021-01-25") %>% # The until argument does not include the day of
twListToDF() %>% # turns the list output into a dataframe
select(text, created, screenName) # selects three columns we want in our analysis
# make a function to unnest the various words and then get rid of common words with anti_join(stop_words)
find_words <- function(x) {
x %>%
unnest_tokens(word, text) %>%
anti_join(stop_words)
}
# Create a list of the dataframes from all the days
df.list <- list(rh_tweets_jan_27, rh_tweets_jan_28, rh_tweets_jan_29,rh_tweets_jan_30, rh_tweets_jan_31, rh_tweets_feb_01, rh_tweets_feb_02, rh_tweets_feb_03)
# Apply the function to the list of dataframes
df.list <- lapply(df.list, find_words)
# Step 1: Set your working directory and load the required packages in RStudio. 
if(!require("tidyverse")) install.packages("tidyverse")
if(!require("tidytext")) install.packages("tidytext")
# Read in the data from my Github
# https://github.com/danderson222/robinhood-sentiment-analysis
df <- readRDS("tweets.rds")

Step 2: Basic Sentiment Analysis

# First we make a new dataframe with three columns: the word, the sentiment its conveying and number of times it is used
df.bing <- df %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = T) %>%
ungroup()
# Then we can run
df.bing %>%
group_by(sentiment) %>%
# Pull out the top 15 words
top_n(15) %>%
# Reorder top to bottom
mutate(word = reorder(word, n)) %>%
# Make the graph
ggplot(aes(x = word, y = n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
# Facet_wrap makes multiple graphs by the sentiment value in this case
facet_wrap(~sentiment, scales = 'free_y') +
labs(title = "Sentiment around #Robinhood tweets",
subtitle = "Other than free and rich (which could be deemed negative), most of the words tweeted were negative",
y = "Number of times the word was tweeted",
x = "Words") +
coord_flip() +
theme_bw() +
theme(plot.title = element_text(face="bold", size =14),
axis.title.x = element_text(face="bold", size = 12),
axis.title.y = element_text(face="bold", size = 12))
Figure 2 — Bing sentiment analysis showing the top words in all #robinhood tweets
# Another cool sentiment method is called "afin"
# This assigns a positive or negative score to each word based on its apparent positivity or negativity
df.afin <- df %>%
inner_join(get_sentiments("afin")) %>%
count(word, value, sort = T) %>%
ungroup()
# Here we create a new column to combine the value of each word with the number of times it was used
df.afin$combined_value <- df.afin$value * df.afin$n
# Now let's merge the two sentiment dataframes
df2 <- merge(df.afin, df.bing[-3], by = "word") #Don't merge the n column from df.bing
# This chart shows the total scores for each word by their positive/ negative score and number of times tweeted
df2 %>%
group_by(sentiment) %>%
top_n(15) %>%
# We have to take the absolute value for the combined_value column given negative words receive a negative score
mutate(word = reorder(word, abs(combined_value))) %>%
# We only want the words that have a score above 100
filter(abs(combined_value) > 100) %>%
ggplot(aes(x = word, y = abs(combined_value), fill = sentiment)) +
geom_col(show.legend = FALSE) +
# facet_wrap splits out the charts by the sentiment
facet_wrap(~sentiment, scales = 'free_y') +
labs(title = "Sentiment around #Robinhood tweets",
subtitle = "With so many high-scoring negative words, the amount of negativity contained in these \ntweets far outscores the positivity",
y = "Number of times tweeted",
x = "Words") +
coord_flip() +
theme_bw() +
theme(plot.title = element_text(face="bold", size =14),
axis.title.x = element_text(face="bold", size = 12),
axis.title.y = element_text(face="bold", size = 12))
Figure 2 — Afin sentiment analysis depicting the sentimental words by score; swear words really win out here…sorry for the language kids

Step 3: Charting Sentiment By Day

# Sentiment by day chart
df3 <- merge(df, df.bing, by = "word")
df3 <- df3 %>%
group_by(created) %>%
count(word, sort = T) %>%
top_n(10) %>%
# Remove grouping
ungroup() %>%
# Arrange by facet group & number of occurences
arrange(created, n) %>%
# Add order column of row numbers
mutate(order = row_number())
df3 <- inner_join(df3, df.bing[-3], by = "word")
# Change the date names
df3 <- df3 %>%
mutate(created=ifelse(created=="2021-01-27", "Wednesday Jan 27th",
ifelse(created == "2021-01-28", "Thursday Jan 28th",
ifelse(created== "2021-01-29", "Friday Jan 29th",
ifelse(created=="2021-01-30", "Saturday Jan 30th",
ifelse(created == "2021-01-31", "Sunday Jan 31st",
ifelse(created== "2021-02-01", "Monday Feb 1st",
ifelse(created== "2021-02-02", "Tuesday Feb 2nd",
"Wednesday Feb 3rd"))))))))
# Change the factor levels to make sure the plot appears properly. I'm lazy and don't want to type it so create a vector using the unique function instead
days <- unique(df3$created)
days
# And turn the column into a factor
df3$created <- factor(df3$created,levels=c(days[1:8]))
# Graph it
ggplot(df3, aes(x = order, y = n, fill = sentiment)) +
geom_col() +
facet_wrap(~created, scales = 'free_y') +
xlab(NULL) +
coord_flip() +
theme_bw() +
# Add categories to axis
scale_x_continuous(
breaks = df3$order,
labels = df3$word,
expand = c(0,0)) +
labs(x = " Top Words Tweeted",
y = "Number of Times Word is Tweeted that Day within Sample of 4,000 Tweets",
title = "Most Common Positive or Negative Words About \n#Robinhood on a Daily Basis",
subtitle = "It is quite telling how much the negative words (especially the swear words) peaked \non Jan 28-30. By Feb 2-3, there is significantly less negative tweet traffic",
fill = "Sentiment of Word") +
theme(plot.title = element_text(face="bold", size =16),
axis.title.x = element_text(face="bold", size = 12),
axis.title.y = element_text(face="bold", size = 12),
legend.title = element_text(face="bold", size = 12),
legend.position = "bottom")
Figure 3 — Sentiment by day shows that the negativity really dies down as time goes on, and didn’t really exist before the trading halt started on the Wednesday January 27th

Step 4: Percentage of Positive/ Negative Word Count

df3 <- merge(df, df2, by = "word") %>% 
select(word, created, value, n, sentiment)
# Create another data frame to merge the two and get the number of times tweeted each day
x <- df %>%
group_by(created) %>%
count(word, sort = T) %>%
ungroup()
df3 <- merge(df3, x, by = c("word", "created"))
colnames(df3) <- c("word", "created", "value", "total_n", "sentiment", "daily_n")
df3 <- df3[!duplicated(df3), ] #get rid of duplicate rows
# Create the combined daily total which looks at the score for positive/ negative by day
df3$combined_daily <- df3$value * df3$daily_n
# I create another dataframe called plot.df for plotting my final chart
plot.df <- df3 %>%
group_by(sentiment, created) %>%
summarize(area = sum(combined_daily))
totals <- df3 %>%
group_by(created) %>%
summarize(total_score = sum(abs(combined_daily)))
plot.df <- merge(plot.df, totals, by = "created")
# Calculate the percentage to be graphed
plot.df$percentage <- plot.df$area / plot.df$total_score
class(plot.df$created)
# Graph it with an area chart
plot.df %>%
ggplot(aes(x = as.Date(created), y = percentage, color = sentiment, fill = sentiment)) +
# I like the area plot showing the percentage totals for positive and negative
geom_area(stat = "identity", alpha = 0.4) +
geom_point(size = 1) +
theme_bw() +
labs(x = "Day",
y = "Percentage of Negative/ Positive Tweets",
title = "How Negative Were #robinhood Tweets?",
subtitle = "While on Jan 27th it was about 50/50, tweets got a lot more \nnegative after the Robinhood app banned trading on the 28th, \nalthough it slightly recovered by Feb 3rd") +
theme(plot.title = element_text(face="bold", size =14, hjust = 0.5),
axis.title.x = element_text(face="bold", size = 12),
axis.title.y = element_text(face="bold", size = 12),
legend.position = "none") +
# Save the plot so we can add some icons to it
ggsave(filename = "output/PositiveNegativeBreakdown.png",
width = 5, height = 4, dpi = 300)
Figure 4 — The basic positive/ negative area chart

Step 5: Add Some Icons

# Load the magick library and call back your created plot
if(!require("magick")) install.packages("magick")
plot <- image_read("output/PositiveNegativeBreakdown.png")
# And bring in your images
nice_img <- image_read("https://cdn2.iconfinder.com/data/icons/primitive-gradient/512/xxx014-512.png")
neg_img <- image_read("https://cdn1.iconfinder.com/data/icons/modifiers-essential-glyph-1/48/Mod_Essentials-02-512.png")
wsb_logo <- image_read("https://i.pinimg.com/originals/29/24/89/292489e7d0bf8ce7d5ffd81be62d0800.png")
twt_logo <- image_read("https://assets.stickpng.com/images/580b57fcd9996e24bc43c53e.png")
# Scale down the logo and give it a border and annotation
# This is the cool part because you can do a lot to the image/logo before adding it
nice_img <- nice_img %>%
image_scale("100")
neg_img <- neg_img %>%
image_scale("150")
wsb_logo <- wsb_logo %>%
image_scale("150")
twt_logo <- twt_logo %>%
image_scale("150")
# Stack them on top of each other
final_plot <- plot %>%
image_composite(nice_img, offset = "+850+450") %>%
image_composite(neg_img, offset = "+825+650") %>%
image_composite(wsb_logo, offset = "+100+220") %>%
image_composite(twt_logo, offset = "+1350+230")
final_plot
# And overwrite the plot without a logo
image_write(final_plot, "output/FinalPlotWithLogos.png")
Figure 5 — Adding some icons for fun to make the graph really pop!

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store
Dylan Anderson

Data Strategy Lead at Redkite. Code in R & blog about politics using data. Connect on LinkedIn, Twitter or at my blog policyinnumbers.com