Sentiment Analysis — WordCloud

2 min readOct 2, 2023

This time I will analyze sentiment using word cloud. Word cloud is a visual representation of word frequency. The more commonly the term appears within the text being analysed, the larger the word appears in the image generated.

It is a data visualization technique used for representing text data in which the size of each word indicates its frequency or importance. Significant textual data points can be highlighted using a word cloud. Word clouds are widely used for analyzing data from social network websites.

For generating word cloud in Python, modules needed are — matplotlib, pandas and wordcloud. To install these packages, run the following commands

Import Lib

import pandas as pd
import numpy as np
import wordcloud
!pip install google-play-scraper

from google_play_scraper import Sort, reviews
from google_play_scraper import app

import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('ggplot')

Collecting Data

result, continuation_token = reviews(
    'com.shopee.id',
    lang='id', 
    country='id',
    sort=Sort.MOST_RELEVANT, 
    count=10000, 
    filter_score_with= None  
)

# Dataframe dengan nama 
dfs = pd.DataFrame(np.array(result),columns=['review'])
dfs = dfs.join(pd.DataFrame(dfs.pop('review').tolist()))
dfs.head()

Analysis Using VADER’s SentimentIntensityAnalyzer

from nltk.sentiment import SentimentIntensityAnalyzer
from tqdm.notebook import tqdm
import nltk
nltk.downloader.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()

# Run the polarity score on the entire dataset
res = {}
for i, row in tqdm(dfs.iterrows(), total=len(dfs)):
    text = row['content']
    myid = row['reviewId']
    res[myid] = sia.polarity_scores(text)

vaders = pd.DataFrame(res).T
vaders.reset_index()
vaders = vaders.reset_index().rename(columns={'index': 'reviewId'})
vaders = vaders.merge(dfs, how='left')

vaders['sentiment'] = np.where(vaders['compound']==0 , 'Neutral',
                              np.where(vaders['compound']<0 , 'Negative', 'Positive'))

ax = vaders['sentiment'].value_counts().sort_index() \
    .plot(kind='bar',
          title='Shopee Review Sentiment',
          figsize=(5, 5))
ax.set_xlabel('Review Sentiment')
plt.show()

Word Cloud

vaders['content']=vaders['content'].astype(str)
vaders['content']=vaders['content'].str.lower()

!pip install wordcloud

import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from wordcloud import WordCloud
import re
import nltk
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')

#object of WordNetLemmatizer
lm = WordNetLemmatizer()
def text_transformation(vaders_col):
    corpus = []
    for item in vaders_col:
        new_item = re.sub('[^a-zA-Z]',' ',str(item))
        new_item = new_item.lower()
        new_item = new_item.split()
        new_item = [lm.lemmatize(word) for word in new_item if word not in set(stopwords.words('english'))]
        corpus.append(' '.join(str(x) for x in new_item))
    return corpus
corpus = text_transformation(vaders['content'])

word_cloud = ""
for row in corpus:
    for word in row:
        word_cloud+=" ".join(word)
wordcloud = WordCloud(width = 1000, height = 500,background_color ='white',min_font_size = 10).generate(word_cloud)
plt.imshow(wordcloud)

Sentiment Analysis — WordCloud

Written by Amanatulamriyah