5 Types Regression in 45 Lines of Code

5 Types Regression in 45 lines of code | by Bob Rupak Roy - II... https://bobrupakroy.medium.com/5-types-regression-in-45-lin...
1 of 43 10/14/2021, 10:14 AM
2 of 43 10/14/2021, 10:14 AM
3 of 43 10/14/2021, 10:14 AM
#Multiple Linear Regression

#importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
4 of 43 10/14/2021, 10:14 AM
#Importing the dataset

dataset = pd.read_csv('abalone.csv',header=None)
# as we don’t have column names
X = dataset.iloc[:,:-1].values
"""#We will define all the independent variables X in the

format [ row, columns] & [ upper bound: lower bound ,upper
: lower bound ] where location is [ : , : -1 ] i.e. all
columns except the last column"""
y = dataset.iloc[:, 8].values #The last 8th column
# Encoding categorical data

from sklearn.preprocessing import LabelEncoder,
OneHotEncoder
from sklearn.compose import ColumnTransformer
#Gender column
ct = ColumnTransformer([("Gender", OneHotEncoder(), [0])],
remainder = 'passthrough')
X = ct.fit_transform(X)
#to avoid dummy variable trap

X = X[:, 1:]
5 of 43 10/14/2021, 10:14 AM
# Split the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,

test_size = 0.2, random_state = 0)
# Fitting Multiple Linear Regression to the Training set

from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)
#Predicting the model accuracy on Test data set

y_pred = regressor.predict(X_test)
#To get the intercept:

print(regressor.intercept_)
#To view the coefficient values
print(regressor.coef_)
6 of 43 10/14/2021, 10:14 AM
7 of 43 10/14/2021, 10:14 AM
#We can also compare the actual versus prediction

df = pd.DataFrame({'Actual': y_test.flatten(),
'Predicted': y_pred.flatten()})
df
8 of 43 10/14/2021, 10:14 AM
df_1 = df.head(25)
df_1.plot(kind=’bar’,figsize=(16,10))
plt.grid(which=’major’, linestyle=’-’, linewidth=’0.5',
color=’green’)
plt.grid(which=’minor’, linestyle=’:’, linewidth=’0.5',
color=’black’)
plt.show()
9 of 43 10/14/2021, 10:14 AM
#evaluation Metrics
from sklearn import metrics
print('Mean Absolute Error:',
metrics.mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:',
metrics.mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:',
np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
10 of 43 10/14/2021, 10:14 AM
#save the model in the disk

import pickle
# save the model to disk
filename = 'reg_model.sav'
pickle.dump(regressor, open(filename, 'wb'))
# load the model from disk

filename1 = 'reg_model.sav'
loaded_model = pickle.load(open(filename1, 'rb'))
#another method using joblib

'''Pickled model as a file using joblib: Joblib is the
replacement of pickle as it is more efficent on objects
that carry large numpy arrays. '''
from sklearn.externals import joblib

# Save the model as a pickle in a file
joblib.dump(regressor, 'regressor.pkl')
# Load the model from the file

loaded_model2 = joblib.load('regressor.pkl')
# Use the loaded model to make predictions

loaded_model2.predict(X_test)
11 of 43 10/14/2021, 10:14 AM
#Multiple Linear Regression
#Importing the libraries

import numpy as np
import pandas as pd

dataset = pd.read_csv('abalone.csv',header=None)
X = dataset.iloc[:,:-1].values
#All columns except the last column (by defining the upper
bound)
y = dataset.iloc[:, 8].values
#Encoding categorical data

OneHotEncoder
#Gender column
X = ct.fit_transform(X)
#to avoid dummy variable trap

X = X[:, 1:]
#Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size = 0.2, random_state = 0)
#Fitting Multiple Linear Regression to the Training set

regressor = LinearRegression()
regressor.fit(X_train, y_train)
#Predicting the Test set results

y_pred = regressor.predict(X_test)
#if we wish to predict by manually entering the values

then we have #to put number of values = number of columns,
12 of 43 10/14/2021, 10:14 AM
representing each #value to its corresponding column

regressor.predict([[1.0,1.0,0.55,0.45,0.15,0.91,0.277,0.24
3,0.33]])
#To get the intercept:

print(regressor.intercept_)
#To view the coefficient values
print(regressor.coef_)
#We can also compare the actual versus predicted

df = pd.DataFrame({'Actual': y_test.flatten(),
'Predicted': y_pred.flatten()})
df
#we can also visualize the actual vs predicted

df_1 = df.head(25)
df_1.plot(kind='bar',figsize=(16,10))
plt.grid(which='major', linestyle='-', linewidth='0.5',
color='green')
plt.grid(which='minor', linestyle=':', linewidth='0.5',
color='black')
plt.show()
#evaluation Metrics
from sklearn import metrics
print('Mean Absolute Error:',
metrics.mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:',
metrics.mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:',
np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
#save the model in the disk

import pickle
# save the model to disk
filename = 'reg_model.sav'
pickle.dump(regressor, open(filename, 'wb'))
#load the model from disk

filename1 = 'reg_model.sav'

replacement of pickle as it is more efficent on objects
that carry large numpy arrays.'''

#Save the model as a pickle in a file
13 of 43 10/14/2021, 10:14 AM
joblib.dump(regressor, 'regressor.pkl')
#Load the model from the file

loaded_model2 = joblib.load('regressor.pkl')
#Use the loaded model to make predictions

loaded_model2.predict(X_test)
14 of 43 10/14/2021, 10:14 AM
import numpy as np
import pandas as pd
15 of 43 10/14/2021, 10:14 AM

dataset = pd.read_csv('abalone.csv', header = None)
#Encoding categorical data

OneHotEncoder
#Gender column
dataset = ct.fit_transform(dataset)
#anyway we wont use this column we will simply use 1
independent variable i.e. column ‘Length’ of abalone data
set and X our dependent variable i.e.‘number of rings’
from the last column.
The reason why i m choosing only 2 columns is to show u

the comparison of performance of both the algorithm using
plots()
X = dataset[:,10:]
y = dataset[:,3]
# Fitting Polynomial Regression to the dataset
from sklearn.preprocessing import PolynomialFeatures

poly_reg = PolynomialFeatures(degree = 2)
X_poly = poly_reg.fit_transform(X)
#will transform X into 2 more features(^2,#containing
features and Square root of features)
#To view the X-plot features

X_poly
#build up the regression(poly) model
16 of 43 10/14/2021, 10:14 AM
poly_reg.fit(X_poly, y)
linear_reg2 = LinearRegression()
linear_reg2.fit(X_poly, y)
# Fitting Linear Regression to the dataset

linear_reg = LinearRegression()
linear_reg.fit(X, y)
# Visualizing the Linear Regression results
plt.scatter(X, y, color = 'red')

plt.plot(X, linear_reg.predict(X), color = 'blue')
plt.title('Predicting the age of abalone from physical
measurements.')
plt.xlabel('Rings')
plt.ylabel('Length')
plt.show()
# Visualizing the Polynomial Regression results

plt.plot(X,
17 of 43 10/14/2021, 10:14 AM
linear_reg2.predict(poly_reg.fit_transform(X)), color =
'blue')
plt.title('Predicting the age of abalone from physical
measurements.')
plt.xlabel('Rings')
plt.ylabel('length')
plt.show()
# Visualizing the Polynomial Regression results (for

smoother curve)
18 of 43 10/14/2021, 10:14 AM
X_grid = np.arange(min(X), max(X), 0.1)

X_grid = X_grid.reshape((len(X_grid), 1))
plt.plot(X_grid,linear_reg2.predict(poly_reg.fit_transform
(X_grid)), color = 'blue')
plt.title('Predicting abalone from physical
measurements.')
plt.xlabel('Rings')
plt.show()
# Predicting a new result with Linear Regression

linear_reg.predict([[10]])
# Predicting a new result with Polynomial Regression

linear_reg2.predict(poly_reg.fit_transform([[10]]))
19 of 43 10/14/2021, 10:14 AM
#Polynomial Regression

import numpy as np
import pandas as pd

dataset = pd.read_csv('abalone.csv', header = None)
# Encoding categorical data

OneHotEncoder
#Gender column
dataset = ct.fit_transform(dataset)
"""#anyway we wont use this column we will simply use 1
independent variable i.e. column ‘Length’ of abalone data
set and X our dependent variable i.e.‘number of rings’
from the last column.
The reason why i m choosing only 2 columns is to show u

the comparison of performance of both the algorithm using
plots()"""
X = dataset[:,10:]
y = dataset[:,3]

linear_reg = LinearRegression()
linear_reg.fit(X, y)
# Fitting Polynomial Regression to the dataset

from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree = 2)
X_poly = poly_reg.fit_transform(X)
#To view the X-plot features
20 of 43 10/14/2021, 10:14 AM
X_poly
#build up the regression(poly) model

poly_reg.fit(X_poly, y)
linear_reg2 = LinearRegression()
linear_reg2.fit(X_poly, y)
# Visualizing the Linear Regression results

plt.plot(X, linear_reg.predict(X), color = 'blue')
measurements.')
plt.xlabel('Rings')
plt.show()
# Visualizing the Polynomial Regression results

plt.plot(X,
linear_reg2.predict(poly_reg.fit_transform(X)), color =
'blue')
measurements.')
plt.xlabel('Rings')
plt.ylabel('length')
plt.show()
# Visualizing the Polynomial Regression results (for

smoother curve)
plt.plot(X_grid,
linear_reg2.predict(poly_reg.fit_transform(X_grid)), color
= 'blue')
measurements.')
plt.xlabel('Rings')
plt.show()
# Predicting a new result with Linear Regression

#linear_reg.predict([length])
linear_reg.predict([[10]])
# Predicting a new result with Polynomial Regression

linear_reg2.predict(poly_reg.fit_transform([[10]]))
21 of 43 10/14/2021, 10:14 AM
22 of 43 10/14/2021, 10:14 AM
23 of 43 10/14/2021, 10:14 AM
# Importing the libraries

import numpy as np
import pandas as pd
# Importing the dataset

data = pd.read_csv('AirPressure.csv')
data
#dividing the dataset into X and y

X = data.iloc[:, 1:2].values
y = data.iloc[:, 2].values
# Feature Scaling for SVR
24 of 43 10/14/2021, 10:14 AM
from sklearn.preprocessing import StandardScaler

sc_X = StandardScaler()
sc_y = StandardScaler()
X = sc_y.fit_transform(X.reshape(-1,1))
y = sc_y.fit_transform(y.reshape(-1,1))

lin = LinearRegression()
lin.fit(X, y)
# Fitting SVR to the dataset

from sklearn.svm import SVR
regressor = SVR(kernel = 'rbf')
regressor.fit(X, y)
25 of 43 10/14/2021, 10:14 AM
# Visualising the Linear Regression results

plt.scatter(X, y, color = 'blue')
plt.plot(X, lin.predict(X), color = 'red')
plt.title('Linear Regression')
plt.xlabel('Temperature')
plt.ylabel('Pressure')
plt.show()
# Visualising the SVR results

plt.plot(X, regressor.predict(X), color = 'red')
plt.title('Support Vector Regression')
plt.show()
#Predicting a new result with Linear Regression

lin.predict([[150.0]])
#Predicting a new result(temperature) with Support Vector

Regression
y_Pressure = regressor.predict([[55]])
26 of 43 10/14/2021, 10:14 AM

import numpy as np
import pandas as pd

data

#Feature Scaling for SVR

#Fitting Linear Regression to the dataset

lin.fit(X, y)
#Fitting SVR to the dataset

regressor.fit(X, y)
#Visualizing the Linear Regression results

27 of 43 10/14/2021, 10:14 AM
plt.show()
# Visualizing the SVR results

plt.show()

#Predicting a new result(pressure) with Support Vector

Regression
y_Pressure = regressor.predict([[55]])
28 of 43 10/14/2021, 10:14 AM
29 of 43 10/14/2021, 10:14 AM

import numpy as np
import pandas as pd
30 of 43 10/14/2021, 10:14 AM

data



lin.fit(X, y)

regressor.fit(X, y)
#Fitting Decision Tree Regression

from sklearn.tree import DecisionTreeRegressor
dt_model = DecisionTreeRegressor(random_state = 0)
dt_model.fit(X, y)
#Visualizing the Linear Regression results

plt.show()
#Visualizing the SVR results

31 of 43 10/14/2021, 10:14 AM

plt.show()
#Visualizing the Decision Trees Regression results

plt.plot(X, dt_model.predict(X), color = 'red')
plt.title('Decision Trees Regression')
plt.show()
32 of 43 10/14/2021, 10:14 AM


Regression
y_Pressure = dt_model.predict([[55]])
# import export_graphviz
from sklearn.tree import export_graphviz
# export the decision tree to a tree.dot file

#for visualizing the plot easily anywhere
export_graphviz(dt_model, out_file
='e:/tree.dot',feature_names =['Pressure'])
33 of 43 10/14/2021, 10:14 AM

import numpy as np
import pandas as pd
34 of 43 10/14/2021, 10:14 AM

data



lin.fit(X, y)

regressor.fit(X, y)
#Visualising the Linear Regression results

plt.show()
#Visualising the SVR results

plt.show()
#Visualising the Decision Trees Regression results

35 of 43 10/14/2021, 10:14 AM
plt.show()
#we will see the line is passing between the blue

points(Thus better model)


Regression
y_Pressure = dt_model.predict([[55]])
from sklearn.tree import export_graphviz

#export the decision tree to a tree.dot file
#for visualizing the plot easily anywhere
export_graphviz(dt_model, out_file ='e:/tree.dot',
feature_names =['Pressure'])
"""
The tree is finally exported and we can visualized using
http://www.webgraphviz.com/ by copying the data from the
‘tree.dot’ file."""
import pickle
#save the model to disk
filename = 'final_model.sav'
pickle.dump(dt_model, open(filename, 'wb'))
#load the model from disk

filename1 = 'final_model.sav'

replacement of pickle as
it is more efficent on objects that carry large numpy
arrays.
'''

#Save the model as a pickle in a file
joblib.dump(dt_model, 'dt_model.pkl')
#Load the model from the file

loaded_model2 = joblib.load('dt_model.pkl')
#Use the loaded model to make predictions

loaded_model2.predict([[55]])
36 of 43 10/14/2021, 10:14 AM
37 of 43 10/14/2021, 10:14 AM
38 of 43 10/14/2021, 10:14 AM

import numpy as np
import pandas as pd

data



39 of 43 10/14/2021, 10:14 AM
dt_model.fit(X, y)
#Fitting Random Forest Regression to the dataset

from sklearn.ensemble import RandomForestRegressor
rf_model = RandomForestRegressor(n_estimators = 500,
random_state = 0)
rf_model.fit(X, y)
# Visualizing the Decision Trees Regression results

plt.show()
# Visualizing the Random Forest results

plt.plot(X, rf_model.predict(X), color = 'red')
plt.title('Random Forest Regression')
plt.show()
# Visualizing the Random Forest results with more

precisely
plt.plot(X_grid, rf_model.predict(X_grid), color = 'blue')
plt.show()
#Predicting a new result(pressure) with Random Forest

Regression
rf_model.predict([[55]])
#Predicting a new result(pressure) with Decision Tree

Regression
dt_model.predict([[55]])
40 of 43 10/14/2021, 10:14 AM
41 of 43 10/14/2021, 10:14 AM

import numpy as np
import pandas as pd

data


#X = sc_X.fit_transform(X)

dt_model.fit(X, y)
#Fitting Random Forest Regression to the dataset

from sklearn.ensemble import RandomForestRegressor
rf_model = RandomForestRegressor(n_estimators = 500,
random_state = 0)
rf_model.fit(X, y)
# Visualizing the Decision Trees Regression results

plt.show()
# Visualizing the Random Forest results

plt.plot(X, rf_model.predict(X), color = 'red')
42 of 43 10/14/2021, 10:14 AM
plt.show()
# Visualizing the Random Forest results in high resolution

plt.plot(X_grid, rf_model.predict(X_grid), color = 'blue')
plt.show()
#Predicting a new result(pressure) with Random Forest

Regression
rf_model.predict([[55]])
#Predicting a new result(pressure) with Decision Tree

Regression
dt_model.predict([[55]])
43 of 43 10/14/2021, 10:14 AM

5 Types Regression in 45 Lines of Code

Uploaded by

Copyright:

Available Formats

5 Types Regression in 45 Lines of Code

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

5 Types Regression in 45 Lines of Code

Uploaded by

Copyright:

Available Formats

5 Types Regression in 45 lines of code | by Bob Rupak Roy - II... https://bobrupakroy.medium.com/5-types-regression-in-45-lin...

#Multiple Linear Regression

#Importing the dataset

"""#We will define all the independent variables X in the

y = dataset.iloc[:, 8].values #The last 8th column

# Encoding categorical data

#to avoid dummy variable trap

X_train, X_test, y_train, y_test = train_test_split(X, y,

# Fitting Multiple Linear Regression to the Training set

#Predicting the model accuracy on Test data set

#To get the intercept:

#We can also compare the actual versus prediction

#save the model in the disk

# load the model from disk

#another method using joblib

from sklearn.externals import joblib

# Load the model from the file

# Use the loaded model to make predictions

#Multiple Linear Regression

#Importing the libraries

#Importing the dataset

#Encoding categorical data

#to avoid dummy variable trap

#Fitting Multiple Linear Regression to the Training set

#Predicting the Test set results

#if we wish to predict by manually entering the values

representing each #value to its corresponding column

#To get the intercept:

#We can also compare the actual versus predicted

#we can also visualize the actual vs predicted

#save the model in the disk

#load the model from disk

#another method using joblib

from sklearn.externals import joblib

#Load the model from the file

#Use the loaded model to make predictions

#Importing the libraries

#Importing the dataset

#Encoding categorical data

The reason why i m choosing only 2 columns is to show u

# Fitting Polynomial Regression to the dataset

from sklearn.preprocessing import PolynomialFeatures

#To view the X-plot features

# Fitting Linear Regression to the dataset

# Visualizing the Linear Regression results

plt.scatter(X, y, color = 'red')

# Visualizing the Polynomial Regression results

plt.scatter(X, y, color = 'red')

# Visualizing the Polynomial Regression results (for

X_grid = np.arange(min(X), max(X), 0.1)

# Predicting a new result with Linear Regression

# Predicting a new result with Polynomial Regression

#Importing the libraries

#Importing the dataset

# Encoding categorical data

The reason why i m choosing only 2 columns is to show u

# Fitting Linear Regression to the dataset

# Fitting Polynomial Regression to the dataset

#To view the X-plot features