Crash Course in Causality Examples
Published in
2 min readApr 27, 2023
Bayesian Networks in Iris Dataset
Importing the libraries
import pandas as pd
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
Loading the dataset
# Load the dataset
df = pd.read_csv('Iris.csv')
Defining the structures of the Bayesian network
# Define the structure of the Bayesian network
model = BayesianModel([('SepalLengthCm', 'Species'), ('SepalWidthCm', 'Species'),
('PetalLengthCm', 'Species'), ('PetalWidthCm', 'Species')])
Learn the parameters of the Bayesian network using maximum likelihood estimation
model.fit(df, estimator=MaximumLikelihoodEstimator)
Create a VariableElimination object for performing inference
inference = VariableElimination(model)
Perform causal inference using the Bayesian network
Calculate the effect of changing ‘SepalLengthCm’ on ‘Species’ by setting ‘SepalLengthCm’ to 5.5
query = inference.query(variables=['Species'], evidence={'SepalLengthCm': 5.5})
for state, prob in zip(query.state_names['Species'], query.values):
print(f"Probability of Species {state} given SepalLengthCm=5.5: {prob}")
Estimate the probability of an iris flower being ‘Iris-setosa’ given that its ‘PetalWidthCm’ is less than 0.4
query = inference.query(variables=['Species'], evidence={'PetalWidthCm': 0.4})
prob = query.values[0]
print(f"Probability of Species Iris-setosa given PetalWidthCm < 0.4: {prob}")
Another Example
import pandas as pd
from pgmpy.models import BayesianModel
from pgmpy.estimators import BayesianEstimator
# Load the dataset
data = pd.read_csv('StudentsPerformance.csv')
# Create a Bayesian network
model = BayesianModel([('gender', 'math score'), ('gender', 'reading score'),
('race/ethnicity', 'math score'), ('race/ethnicity', 'reading score'),
('parental level of education', 'math score'), ('parental level of education', 'reading score'),
('lunch', 'math score'), ('lunch', 'reading score'),
('test preparation course', 'math score'), ('test preparation course', 'reading score')])
# Fit the model using Maximum Likelihood Estimation (MLE) estimator
model.fit(data, estimator=BayesianEstimator, prior_type='BDeu', equivalent_sample_size=10)
# Display the model structure
model.edges()
# Perform inference using the model
from pgmpy.inference import VariableElimination
# Create a variable elimination object
infer = VariableElimination(model)
# Compute the conditional probability of math score given test preparation course
q = infer.query(['math score'], evidence={'test preparation course': 'completed'})
print(q.values[1])