Customer Churn: Understanding Customer Attrition
GitHub LinkedIn Medium Portfolio Substack
Customer churn (or customer attrition) refers to the loss of customers or subscribers for any reason at all. Businesses measure and track churn as a percentage of lost customers compared to the total number of customers over a given period. This metric is usually tracked monthly and reported at the end of the month.
Let us deep dive into Customer Churn with a real-world dataset
For this experiment, we will be using the Telco Customer Churn Dataset: https://www.kaggle.com/datasets/blastchar/telco-customer-churn
Importing Libraries
import os
from pathlib import Path, PurePath
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
accuracy_score,
precision_score,
recall_score,
f1_score,
classification_report,
confusion_matrix
)
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
import warnings
warnings.filterwarnings('ignore')
Loading Dataset
df = pd.read_csv(r"datas/WA_Fn-UseC_-Telco-Customer-Churn.csv")
df.shape
Data Visualization
fig = make_subplots(rows=n_rows, cols=n_cols, subplot_titles=titles, specs=specs)
for i in range(1, n_rows + 1): # Rows
for j in range(1, n_cols + 1): # Columns
idx = (i - 1) * 2 + j - 1
label = labels[idx]
value = values[idx]
fig.add_trace(
go.Pie(labels=label, values=value, hole=0.3, name=f'Pie {idx + 1}'),
row=i,
col=j
)
fig.update_layout(
title=dict(text="<b>Distribution Charts</b>", x=0.5, xanchor='center'),
showlegend=True,
height=3200,
width=1600
)
fig.update_traces(hole=.2, hoverinfo="label+percent+name", textfont_size=16)
fig.show()
Churn Pie Distribution
labels = list((df['Churn'].value_counts()).index)
values = df['Churn'].value_counts()
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.3)])
title = f"<b>Churn Distribution</b>"
fig.update_layout(
title=dict(text=title, x=0.5, xanchor='center'),
showlegend=True,
)
fig.update_traces(hole=.2, hoverinfo="all", textfont_size=16)
fig.show()
Bar Plot
def plot_hist(column):
fig = px.histogram(df, x="Churn", color=column, title=f"<b>{column} distribution w.r.t. Churn</b>")
fig.update_layout(width=700, height=500, bargap=0.1)
fig.update_traces(hoverinfo="all", textfont_size=16)
fig.show()
for i in col:
plot_hist(i)
Distribution of total charges by churn
ax = sns.kdeplot(df.TotalCharges[(df["Churn"] == 'No') ],
color="Gold", shade = True)
ax = sns.kdeplot(df.TotalCharges[(df["Churn"] == 'Yes') ],
ax =ax, color="Green", shade= True)
ax.legend(["Not Chu0rn","Churn"],loc='upper right')
ax.set_ylabel('Density')
ax.set_xlabel('Total Charges')
ax.set_title('Distribution of total charges by churn', weight='bold')
plt.show()
Distribution of monthly charges by churn
sns.set_context("paper",font_scale=1.1)
ax = sns.kdeplot(df.MonthlyCharges[(df["Churn"] == 'No') ],
color="Red", shade = True)
ax = sns.kdeplot(df.MonthlyCharges[(df["Churn"] == 'Yes') ],
ax =ax, color="Blue", shade= True)
ax.legend(["Not Churn","Churn"],loc='upper right')
ax.set_ylabel('Density')
ax.set_xlabel('Monthly Charges')
ax.set_title('Distribution of monthly charges by churn', weight='bold')
plt.show()
Tenure vs Churn
fig = px.box(df, x='Churn', y = 'tenure')
fig.update_yaxes(title_text='Tenure (Months)', row=1, col=1)
fig.update_xaxes(title_text='Churn', row=1, col=1)
fig.update_layout(autosize=True, width=750, height=600,
title_font=dict(size=25, family='Courier'),
title='<b>Tenure vs Churn</b>',
)
fig.show()
Distplot of Tenure, Monthly Charges, Total Charges
plt.figure(figsize=(12, 8))
plt.subplot(2, 2, 1)
sns.distplot(df['tenure'], color='r')
plt.subplot(2, 2, 2)
sns.distplot(df['MonthlyCharges'], color='b')
plt.subplot(2, 2, 3)
sns.distplot(df['TotalCharges'], color='y')
plt.show()
def plot_w_r_t_churn(col, title):
colors = ['#4D3425','#E4512B']
ax = col.plot(
kind='bar',
width = 0.2,
stacked = True,
rot = 0,
figsize = (8,6),
color = colors)
ax.yaxis.set_major_formatter(mtick.PercentFormatter())
ax.legend(loc='center',prop={'size':14},title = 'Churn')
ax.set_ylabel('% Customers')
ax.set_title(f'Churn by {title} Level',size = 14)
# Code to add the data labels on the stacked bar chart
for p in ax.patches:
width, height = p.get_width(), p.get_height()
x, y = p.get_xy()
ax.annotate('{:.0f}%'.format(height), (p.get_x()+.25*width, p.get_y()+.4*height),
color = 'white', size=8)
Correlation Matrix
plt.figure(figsize=(25, 15))
sns.heatmap(
df1.corr(),
annot=True,
cmap="coolwarm",
linewidths=2
)
plt.tight_layout()
plt.show()
def barplot(col_name, y_label, x_label, title):
colors = ['#4D3425','#E4512B']
ax = (df[col_name].value_counts()*100.0 /len(df)).plot(kind='bar',stacked = True, rot = 0, color = colors)
ax.yaxis.set_major_formatter(mtick.PercentFormatter())
ax.set_ylabel(y_label)
ax.set_xlabel(x_label)
ax.set_title(title)
totals = []
for i in ax.patches:
totals.append(i.get_width())
total = sum(totals)
for i in ax.patches:
ax.text(
i.get_x()+.15,
i.get_height()-3.5,
str(round((i.get_height()/total), 1))+'%',
fontsize=12,
color='white',
weight = 'bold'
)
Classification Model
For this project, I experimented with many classification algorithms but CatBoost gave the best results.
CatBoost Algorithm
cb = CatBoostClassifier()
cb.fit(x_train, y_train.Churn)
cb_y_pred = cb.predict(x_test)
cm = confusion_matrix(y_test.Churn, cb_y_pred)
CM(cm)
Accuracy: 0.8184397163120567
Precision: 0.7218045112781954
Recall: 0.5133689839572193
F_Score: 0.6000000000000001
Classification Report
precision recall f1-score support
0 0.84 0.93 0.88 518
1 0.72 0.51 0.60 187
accuracy 0.82 705
macro avg 0.78 0.72 0.74 705
weighted avg 0.81 0.82 0.81 705
cb_imp = cb.feature_importances_
cb_weights = pd.Series(
cb_imp,
index=x_train.columns.values
)
cb_weights.sort_values(ascending=False)[:10].plot(kind='bar')
- Link to Notebook: https://www.kaggle.com/code/sharathshebbar/telco-churn
- Link to Github: https://github.com/SharathHebbar/Customer-Analytics/tree/master/Customer%20Churn