Weather Prediction Module

Nirajan Acharya
3 min readJul 26, 2023

--

Import necessaries Libraries

import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import re
import missingno as mso
from scipy import stats
from scipy.stats import ttest_ind
from scipy.stats import pearsonr
from sklearn.preprocessing import StandardScaler,LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
import pandas as pd

Read CSV file

data=pd.read_csv("/content/seattle-weather.csv")
data.head()

Shape of Data

data.shape

(1461, 6)

import warnings
warnings.filterwarnings('ignore')
sns.countplot("weather",data=data,palette='hls')
countrain=len(data[data.weather=='rain'])
countsun=len(data[data.weather=='sun'])
countdrizzle=len(data[data.weather=='drizzle'])
countsnow=len(data[data.weather=='snow'])
countfog=len(data[data.weather=='fog'])
print('percent of rain:{:2f}%'.format((countrain/(len(data.weather))*100)))
print('percent of sun:{:2f}%'.format((countsun/(len(data.weather))*100)))
print('percent of drizzle:{:2f}%'.format((countdrizzle/(len(data.weather))*100)))
print('percent of snow:{:2f}%'.format((countsnow/(len(data.weather))*100)))
print('percent of fog:{:2f}%'.format((countfog/(len(data.weather))*100)))
data[['precipitation','temp_max','temp_min','wind']].describe()
sns.set(style='darkgrid')
fig,axs=plt.subplots(2,2,figsize=(10,8))
sns.histplot(data=data,x='precipitation',kde=True,ax=axs[0,0],color='green')
sns.histplot(data=data,x='temp_max',kde=True,ax=axs[0,1],color='red')
sns.histplot(data=data,x='temp_min',kde=True,ax=axs[1,0],color='blue')
sns.histplot(data=data,x='wind',kde=True,ax=axs[1,1],color='orange')
sns.set(style='darkgrid')
fig,axs=plt.subplots(2,2,figsize=(10,8))
sns.violinplot(data=data,x='precipitation',kde=True,ax=axs[0,0],color='green')
sns.violinplot(data=data,x='temp_max',kde=True,ax=axs[0,1],color='red')
sns.violinplot(data=data,x='temp_min',kde=True,ax=axs[1,0],color='blue')
sns.violinplot(data=data,x='wind',kde=True,ax=axs[1,1],color='orange')
plt.figure(figsize=(12,6))
sns.boxplot('precipitation','weather',data=data,palette='YlOrBr')
plt.figure(figsize=(12,6))
sns.boxplot('temp_max','weather',data=data,palette='inferno')
plt.figure(figsize=(12,6))
sns.boxplot('wind','weather',data=data,palette='YlOrBr')
plt.figure(figsize=(12,6))
sns.boxplot('temp_min','weather',data=data,palette='YlOrBr')
plt.figure(figsize=(12,6))
sns.heatmap(data.corr(),annot=True,cmap='coolwarm')
data.plot("precipitation",'temp_max',style='o')
print('pearsons correlation: ',data['precipitation'].corr(data['temp_max']))
print('T test and P value: ',stats.ttest_ind(data['precipitation'],data['temp_max']))

Pearsons correlation: -0.22855481643297046
T test and P value: Ttest_indResult(statistic=-51.60685279531918, pvalue=0.0)

data.plot("wind",'temp_max',style='o')
print('pearsons correlation: ',data['wind'].corr(data['temp_max']))
print('T test and P value: ',stats.ttest_ind(data['wind'],data['temp_max']))

Pearsons correlation: -0.16485663487495486
T test and P value: Ttest_indResult(statistic=-67.3601643301846, pvalue=0.0)

data.plot('temp_max','temp_min',style='o')
data.isna().sum()
plt.figure(figsize=(12,6))
axz=plt.subplot(1,2,2)
mso.bar(data.drop(['date'],axis=1),ax=axz,fontsize=12)
data=data.drop(['date'],axis=1)

Q1=data.quantile(0.25)
Q3=data.quantile(0.75)
IQR=Q3-Q1
data=data[~((data<(Q1-1.5*IQR))|(data>(Q3+1.5*IQR))).any(axis=1)]
import numpy as np
data.precipitation=np.sqrt(data.precipitation)
data.wind=np.sqrt(data.wind)
sns.set(style='darkgrid')
fig, axs=plt.subplots(2,2,figsize=(10,8))
sns.histplot(data=data,x="precipitation",kde=True,ax=axs[0,0],color='green')
sns.histplot(data=data,x="temp_max",kde=True,ax=axs[0,1],color='red')
sns.histplot(data=data,x="temp_min",kde=True,ax=axs[1,0],color='blue')
sns.histplot(data=data,x="wind",kde=True,ax=axs[1,1],color='orange')
data.head()
lc=LabelEncoder()
data['weather']=lc.fit_transform(data['weather'])
data.head()
x=((data.loc[:,data.columns!='weather']).astype(int)).values[:,0:]
y=data['weather'].values
data.weather.unique()
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.1,random_state=2)
knn=KNeighborsClassifier()
knn.fit(x_train,y_train)
print('KNN accuracy:{:.2f}%'.format(knn.score(x_test,y_test)*100))
svm=SVC()
svm.fit(x_train,y_train)
print('SVM accuracy:{:.2f}%'.format(svm.score(x_test,y_test)*100))
gbc=GradientBoostingClassifier(subsample=0.5,n_estimators=450,max_depth=5,max_leaf_nodes=25)
gbc.fit(x_train,y_train)
print('GBC accuracy:{:.2f}%'.format(gbc.score(x_test,y_test)*100))
import warnings
warnings.filterwarnings('ignore')
xgb=XGBClassifier()
xgb.fit(x_train,y_train)
print('XGB accuracy:{:.2f}%'.format(xgb.score(x_test,y_test)*100))
input=[[1.140175,8.9,2.8,2.469818]]
ot=xgb.predict(input)
print('the weather is:')
if(ot==0):
print('Drizzle')
elif (ot==1):
print('fogg')
elif (ot==2):
print('rain')
elif (ot==3):
print('snow')
else:
print('sun')
import pickle
file = 'model.pkl'
pickle.dump(xgb, open(file, 'wb'))

--

--