Pytorch Regression model using Azure Machine learning

Balamurugan Balakreshnan
Analytics Vidhya
Published in
4 min readApr 1, 2021

Run Regression using pytorch with Azure ML

Article is intent to show the process and it’s fake model.

Pre requisite

  • Azure Account
  • Resource group
  • Azure Machine learning
  • Azure Storage blob
  • Download the Nasa Predictive Maintenance data set
  • Go to https://ti.arc.nasa.gov/tech/dash/groups/pcoe/prognostic-data-repository/
  • Scroll down to Turbofan Engine Degradation Simulation Data Set click Download Turbofan Engine Degradation Simulation Data Set (68202 downloads)
  • Create a dataset in Azure ML called nasaPredMaint

Azure Blob Storage

  • Create a Storage account
  • Create a container called PredMaint
  • Upload the files that was downloaded

Azure Machine Learning

  • First create a data set
  • Create a new data store called: nasapredmaint
  • Now lets create a Compute instance
  • Start the compute instance
  • Create a new notebook
  • Select python + AzureML
  • Load the registered data set
# azureml-core of version 1.0.72 or higher is required
# azureml-dataprep[pandas] of version 1.1.34 or higher is required
from azureml.core import Workspace, Dataset

subscription_id = 'xxxxxxxxxxx'
resource_group = 'rgname'
workspace_name = 'mlworkspace'

workspace = Workspace(subscription_id, resource_group, workspace_name)

dataset = Dataset.get_by_name(workspace, name='nasaPredMaint')
dataset.to_pandas_dataframe()
  • convert data set to dataframe
df = dataset.to_pandas_dataframe()
  • List all the columns
df.columns
  • import libraries
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch.autograd import Variable
  • Drop columns which are labels and un wanted columns
df1 = df.drop(columns=['timecycles','sensor22', 'sensor23'])
  • Split features and labels
X = df1.iloc[:,:31]
y = df[['timecycles']]
  • display dataframe
df1.head()
  • now import new libraries
#Let's get rid of some imports
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
#Define the model
import torch
import torch.nn as nn
import torch.nn.functional as F
  • defines features and label
from sklearn.model_selection  import train_test_split
X = df1.iloc[:, 0:29]
y = df[['timecycles']]

# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
  • Define parameters
#Define training hyperprameters.
batch_size = 50
num_epochs = 200
learning_rate = 0.01
size_hidden= 100

#Calculate some other hyperparameters based on data.
batch_no = len(X_train) // batch_size #batches
cols=X_train.shape[1] #Number of columns in input matrix
n_output=1
  • Create a model
#Create the model
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Assume that we are on a CUDA machine, then this should print a CUDA device:
print("Executing the model on :",device)
class Net(torch.nn.Module):
def __init__(self, n_feature, size_hidden, n_output):
super(Net, self).__init__()
self.hidden = torch.nn.Linear(cols, size_hidden) # hidden layer
self.predict = torch.nn.Linear(size_hidden, n_output) # output layer

def forward(self, x):
x = F.relu(self.hidden(x)) # activation function for hidden layer
x = self.predict(x) # linear output
return x
net = Net(cols, size_hidden, n_output)
  • configure the optimizer
#Adam is a specific flavor of gradient decent which is typically better
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
#optimizer = torch.optim.SGD(net.parameters(), lr=0.2)
criterion = torch.nn.MSELoss(size_average=False) # this is for regression mean squared loss
  • change to values
#Change to numpy arraay. 
X_train=X_train.values
y_train=y_train.values
X_test=X_test.values
y_test=y_test.values
  • now run the model
from sklearn.utils import shuffle
from torch.autograd import Variable
running_loss = 0.0
for epoch in range(num_epochs):
#Shuffle just mixes up the dataset between epocs
X_train, y_train = shuffle(X_train, y_train)
# Mini batch learning
for i in range(batch_no):
start = i * batch_size
end = start + batch_size
inputs = Variable(torch.FloatTensor(X_train[start:end]))
labels = Variable(torch.FloatTensor(y_train[start:end]))
# zero the parameter gradients
optimizer.zero_grad()

# forward + backward + optimize
outputs = net(inputs)
#print("outputs",outputs)
#print("outputs",outputs,outputs.shape,"labels",labels, labels.shape)
loss = criterion(outputs, torch.unsqueeze(labels,dim=1))
loss.backward()
optimizer.step()

# print statistics
running_loss += loss.item()

print('Epoch {}'.format(epoch+1), "loss: ",running_loss)
running_loss = 0.0
  • Calculate the metrics
import pandas as pd
from sklearn.metrics import r2_score

X = Variable(torch.FloatTensor(X_train))
result = net(X)
pred=result.data[:,0].numpy()
print(len(pred),len(y_train))
r2_score(pred,y_train)
  • Find R2 score
import pandas as pd
from sklearn.metrics import r2_score
#This is a little bit tricky to get the resulting prediction.
def calculate_r2(x,y=[]):
"""
This function will return the r2 if passed x and y or return predictions if just passed x.
"""
# Evaluate the model with the test set.
X = Variable(torch.FloatTensor(x))
result = net(X) #This outputs the value for regression
result=result.data[:,0].numpy()

if len(y) != 0:
r2=r2_score(result, y)
print("R-Squared", r2)
#print('Accuracy {:.2f}'.format(num_right / len(y)), "for a total of ", len(y), "records")
return pd.DataFrame(data= {'actual': y, 'predicted': result})
else:
print("returning predictions")
return result
  • Run linear model
from sklearn.linear_model import LinearRegression
lm = LinearRegression()
lm.fit( X_train, y_train )
  • Print score
print('R2 for Train)', lm.score( X_train, y_train ))
print('R2 for Test (cross validation)', lm.score(X_test, y_test))

Samples2021/pytorchregression.md at main · balakreshnan/Samples2021 (github.com)

--

--