Dashboards in Python Using Dash — Creating a Data Table using Data from Reddit

Mine Reddit for NLP and Share with Dash

Eric Kleppen
Feb 18 · 13 min read
Image for post
Image for post
Photo by Carlos Muza on Unsplash

Collecting NLP Datasets

Image for post
Image for post
The Dashboard

Getting Started with Reddit API

Manage your API keys

Registering an App for Keys

Image for post
Image for post
reddit.com User Account button
Image for post
Image for post
Image for post
Image for post
Image for post
Image for post

File Structure and Config

Installation and Dependencies

pip install praw, pandas, dash, dash-bootstrap-components
import praw
import pandas as pd
#from config import cid, csec, ua

Create a Reddit Instance

#create a reddit connection
reddit = praw.Reddit(client_id= cid,
client_secret= csec,
user_agent= ua)

Identify Subreddits

Exploring Objects and Attributes

#single subreddit new 5
subreddit = reddit.subreddit('news').new(limit = 5)
#multiple subreddits top 5
subreddit = reddit.subreddit('news' + 'datascience').top(limit = 5)
Image for post
Image for post
subreddit = reddit.subreddit('news').new(limit = 1)
for post in subreddit:
pprint.pprint(vars(post))
Image for post
Image for post
Example of the post object attributes
#return the attributes of interests for the first rising record
rising_posts = reddit.subreddit('news').rising(limit=1)
for post in rising_posts:
print(post.title)
print(post.score)
print(post.num_comments)
print(post.selftext)
print(post.created)
print(post.total_awards_received)
Image for post
Image for post
The printed attributes

Populate a DataFrame

#list for df conversion
posts = []
#return 100 new posts from wallstreetbets
new_bets = reddit.subreddit('wallstreetbets').new(limit=100)
#return the important attributes
for post in new_bets:
posts.append([post.title, post.score, post.num_comments, post.selftext, post.created, post.pinned, post.total_awards_received])
#create a dataframe
posts = pd.DataFrame(posts,columns=['title', 'score', 'comments', 'post', 'created', 'pinned', 'total awards'])
#return top 3 df rows
posts.head(3)
Image for post
Image for post

Create NLP Features

#copy the dataframe
df = posts.copy()
#count words in post
df['words'] = df['post'].apply(lambda x : len(x.split()))
#count characters in post
df['chars'] = df['post'].apply(lambda x : len(x.replace(" ","")))
#calculate word density
df['word density'] = (df['words'] / (df['chars'] + 1)).round(3)
#count unique words
df['unique words'] = df['post'].apply(lambda x: len(set(w for w in x.split())))
#percent of unique words
df['unique density'] = (df['unique words'] / df['words']).round(3)
Image for post
Image for post
DataFrame with added Features

Create the Dash Files

DataTable Component in Dash

layout = dash_table.DataTable(
id='table',
columns=[{"name": i, "id": i} for i in df.columns],
data=df.to_dict('records')
)
Image for post
Image for post

Data Table Styling

Sizing

Set Columns to Overflow

layout = dash_table.DataTable(
id='table',
style_data={
'whiteSpace': 'normal',
'height': 'auto'
},
columns=[{"name": i, "id": i} for i in df.columns],
data=df.to_dict('records')
)
Image for post
Image for post
Data Table

Combine Sizing Properties

    , style_cell_conditional=[
{'if': {'column_id': 'title'},
'width': '200px'},
{'if': {'column_id': 'post'},
'width': '670px'
,'height':'auto'},
]
,style_cell={
'overflow': 'hidden',
'textOverflow': 'ellipsis',
'maxWidth': '50px'
}
Image for post
Image for post

Set the Table Height

style_table={
'maxHeight': '700px'
,'overflowY': 'scroll'
},
,fixed_rows={ 'headers': True, 'data': 0 },
,fixed_columns={ 'headers': True, 'data': 1 }

The DataTable

dash_table.DataTable(
id='table'
, style_cell_conditional=[
{'if': {'column_id': 'title'},
'width': '200px'},
{'if': {'column_id': 'post'},
'width': '670px'
,'height':'auto'}
]
,style_cell={
'overflow': 'hidden',
'textOverflow': 'ellipsis',
'maxWidth': '50px'
}
, style_table={
'maxHeight': '700px'
,'overflowY': 'scroll'
}
,columns=[{"name": i, "id": i} for i in df.columns]
,fixed_rows={ 'headers': True, 'data': 0 }
,data=df.to_dict('records')
)

Conditional Formatting

Image for post
Image for post
https://dash.plot.ly/datatable/filtering
style_data_conditional=[
{
'if': {
'column_id': 'score',
'filter_query': '{score} gt 50'
},
'backgroundColor': '#3D9970',
'color': 'white',
},
{
'if': {
'column_id': 'comments',
'filter_query': '{comments} gt 20'
},
'backgroundColor': '#3D9970',
'color': 'white',
},
Image for post
Image for post
Conditional Formatting

Adding a Refresh Button and Input

app.layout = html.Div([
html.P(html.Button('Refresh', id='refresh'))
,html.P(html.Div(html.H3('Enter Subreddit')))
,dcc.Input(id='input-1-state', type='text', value='wallstreetbets')
,dash_table.DataTable(
...
...
...
@app.callback(Output('table', 'data'),
[Input('refresh', 'n_clicks')],
[State('input-1-state', 'value')
])
def update_data(n_clicks, subreddits):
dff = df
if subreddits is None:
subreddits = 'wallstreetbets'
else:
subreddits
if n_clicks is None:
raise PreventUpdate
else:
posts = []
new_bets = reddit.subreddit(subreddits).new(limit=100)
for post in new_bets:
posts.append([post.title, post.score, post.num_comments, post.selftext, post.created, post.pinned, post.total_awards_received])
posts = pd.DataFrame(posts,columns=['title', 'score', 'comments', 'post', 'created', 'pinned', 'total awards'])
dff = posts.copy()
dff['words'] = dff['post'].apply(lambda x : len(x.split()))
dff['chars'] = dff['post'].apply(lambda x : len(x.replace(" ","")))
dff['word density'] = (dff['words'] / (dff['chars'] + 1)).round(3)
dff['unique words'] = dff['post'].apply(lambda x: len(set(w for w in x.split())))
dff['unique density'] = (dff['unique words'] / dff['words']).round(3)

return dff.to_dict('records')
Image for post
Image for post

The Complete Code

The Code

import dash
import dash_html_components as html
import dash_core_components as dcc
import dash_table
import pandas as pd
import praw
import pandas as pd
from dash.dependencies import Input, Output, State
from dash.exceptions import PreventUpdate
from config import cid, csec, uag
reddit = praw.Reddit(client_id= cid, client_secret= csec, user_agent= uag)
posts = []
new_bets = reddit.subreddit('wallstreetbets').new(limit=100)
for post in new_bets:
posts.append([post.title, post.score, post.num_comments, post.selftext, post.created, post.pinned, post.total_awards_received])
posts = pd.DataFrame(posts,columns=['title', 'score', 'comments', 'post', 'created', 'pinned', 'total awards'])
df = posts.copy()
df['words'] = df['post'].apply(lambda x : len(x.split()))
df['chars'] = df['post'].apply(lambda x : len(x.replace(" ","")))
df['word density'] = (df['words'] / (df['chars'] + 1)).round(3)
df['unique words'] = df['post'].apply(lambda x: len(set(w for w in x.split())))
df['unique density'] = (df['unique words'] / df['words']).round(3)
app = dash.Dash(__name__)app.layout = html.Div([
html.P(html.Button('Refresh', id='refresh'))
,html.P(html.Div(html.H3('Enter Subreddit')))
,dcc.Input(id='input-1-state', type='text', value='wallstreetbets')
,dash_table.DataTable(
id='table'
, style_cell_conditional=[
{'if': {'column_id': 'title'},
'width': '200px'},
{'if': {'column_id': 'post'},
'width': '670px'
,'height':'auto'}
]
,style_cell={
'overflow': 'hidden',
'textOverflow': 'ellipsis',
'maxWidth': '50px'
}
, style_table={
'maxHeight': '700px'
,'overflowY': 'scroll'
}
,style_data_conditional=[
{
'if': {
'column_id': 'score',
'filter_query': '{score} gt 50'
},
'backgroundColor': '#3D9970',
'color': 'white',
}
,{
'if': {
'column_id': 'score',
'filter_query': '{score} lt 10'
},
'backgroundColor': '#B20000',
'color': 'white',
}
,{
'if': {
'column_id': 'comments',
'filter_query': '{comments} gt 45'
},
'backgroundColor': '#3D9970',
'color': 'white',
}
,{
'if': {
'column_id': 'comments',
'filter_query': '{comments} lt 20'
},
'backgroundColor': '#B20000',
'color': 'white',
}
,{
'if': {
'column_id': 'unique density',
'filter_query': '{unique density} lt 0.7'
},
'backgroundColor': '#3D9970',
'color': 'white',
}
]
,columns=[{"name": i, "id": i} for i in df.columns]
,fixed_rows={ 'headers': True, 'data': 0 }
,data=df.to_dict('records')
)
])

@app.callback(Output('table', 'data'),
[Input('refresh', 'n_clicks')],
[State('input-1-state', 'value')
])
def update_data(n_clicks, subreddits):
dff = df
if subreddits is None:
subreddits = 'wallstreetbets'
else:
subreddits
if n_clicks is None:
raise PreventUpdate
else:
posts = []
new_bets = reddit.subreddit(subreddits).new(limit=100)
for post in new_bets:
posts.append([post.title, post.score, post.num_comments, post.selftext, post.created, post.pinned, post.total_awards_received])
posts = pd.DataFrame(posts,columns=['title', 'score', 'comments', 'post', 'created', 'pinned', 'total awards'])
dff = posts.copy()
dff['words'] = dff['post'].apply(lambda x : len(x.split()))
dff['chars'] = dff['post'].apply(lambda x : len(x.replace(" ","")))
dff['word density'] = (dff['words'] / (dff['chars'] + 1)).round(3)
dff['unique words'] = dff['post'].apply(lambda x: len(set(w for w in x.split())))
dff['unique density'] = (dff['unique words'] / dff['words']).round(3)

return dff.to_dict('records')
if __name__ == '__main__':
app.run_server(debug=True, port = 8050)

The Startup

Medium's largest active publication, followed by +720K people. Follow to join our community.

Eric Kleppen

Written by

Software Product Analyst in Data Science. pythondashboards.com Top writer in Business www.linkedin.com/in/erickleppen01/

The Startup

Medium's largest active publication, followed by +720K people. Follow to join our community.

Eric Kleppen

Written by

Software Product Analyst in Data Science. pythondashboards.com Top writer in Business www.linkedin.com/in/erickleppen01/

The Startup

Medium's largest active publication, followed by +720K people. Follow to join our community.

Medium is an open platform where 170 million readers come to find insightful and dynamic thinking. Here, expert and undiscovered voices alike dive into the heart of any topic and bring new ideas to the surface. Learn more

Follow the writers, publications, and topics that matter to you, and you’ll see them on your homepage and in your inbox. Explore

If you have a story to tell, knowledge to share, or a perspective to offer — welcome home. It’s easy and free to post your thinking on any topic. Write on Medium

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store