Michelle Bonat
Jul 25, 2018 · 5 min read
# Setup for Outlier Detection
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
import matplotlib.font_manager

from sklearn import svm
from sklearn.covariance import EllipticEnvelope
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor


rng = np.random.RandomState(42)
# 2) Load the data from the remote url via an AWS S3 bucket

dataset_url = 'https://s3.amazonaws.com/your-url-goes-here.csv'
data = pd.read_csv(dataset_url)
print data.head()
Data head with the first 5 rows
print data.shape
print data.describe()

