In [39]:
import sklearn
import numpy as np
import pandas as pd
#!python --version

#### Load data from file and preprocess data

In [None]:
# import data from text file into a dataframe
df = pd.read_csv(r'data1.txt', sep ="\t")
# Note for later in the lab: the command below includes a header specification
# which will read data without considering the first line in the file as header
# df = pd.read_csv(r'data1.txt', sep ="\t", header = None)
print(df)
print(df.shape)

In [None]:
# convert data frame into a numpy array
data = df.to_numpy()
data

In [None]:
rows, cols = data.shape
print("Rows: ", rows)
print("Cols: ", cols)

# separate target (or label) from input
# we use slicing operator from Python
target = data[: , 2]  # class labels for data
print("Target:", target)
input = data[: , 0:cols-1] # input data
print("Input: \n", input)


#### Create 80-20 train and test data

In [None]:
from sklearn import linear_model
from sklearn import model_selection

X_train, X_test, y_train, y_test = model_selection.train_test_split(
    input, target, test_size=0.20, random_state=42)

print("X_train:", X_train)
print("X_test:", X_test)
print("y_train:", y_train)
print("y_test:", y_test)

#### Create, train, and test perceptron

In [None]:
clf = linear_model.Perceptron(tol = 1e-3, random_state = 0) # creates a perceptron
clf.fit(X_train, y_train) # train the perceptron


In [None]:
print("Accuracy in training: ")
clf.score(X_train, y_train) # compute accuracy in train data

In [None]:
print("Accuracy in testing: ")
clf.score(X_test, y_test)

#### Print predicted label and true label (target)

In [None]:
# Perceptron predicts class label for train data
predict_train = clf.predict(X_train)
print("Predicted and true label of train data")
[predict_train, y_train]

In [None]:
# shows params of the perceptron model
clf.get_params()

#### Read the perceptron weights and plot the line they form along with plotting the 2D data

In [None]:
w0 = clf.intercept_ # bias, 1 row x 1 cols
weights = clf.coef_ # the other weights, 1 row x 2 cols
w = np.concatenate((w0, weights.flatten()), axis = 0)
w

In [None]:
import matplotlib.pyplot as plt

# data has examples as rows, and last column is the label 0 or 1
def plot_points(data):
    xy = data[data[:,-1] == 1,:] # take all rows with last column 1
    plt.scatter(xy[:,0], xy[:,1])
    xy = data[data[:,-1] == 0,:] # take all rows with last column 0
    plt.scatter(xy[:,0], xy[:,1])

def plot_lines(min_x, max_x, w):
    x = np.linspace(min_x, max_x, 10)
    y = -w[0]/w[2] -x*w[1]/w[2]
    plt.plot(x,y)

plot_points(data)
plot_lines(0, 1, w)


#### Part B)