Friday, 1 December 2023

Precision, Recall and F1-Score

 

import seaborn as sns

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score


# Assuming 'Outcome' is the target variable in your dataset

X_train, X_test, y_train, y_test = train_test_split(diabetes.loc[:, diabetes.columns != 'Outcome'], diabetes['Outcome'], stratify=diabetes['Outcome'], random_state=66)


# Create and train the KNN classifier

knn = KNeighborsClassifier(n_neighbors=9)

knn.fit(X_train, y_train)


# Print accuracy on training and test sets

print('Accuracy of K-NN classifier on training set: {:.2f}'.format(knn.score(X_train, y_train)))

print('Accuracy of K-NN classifier on test set: {:.2f}'.format(knn.score(X_test, y_test)))


# Generate and print confusion matrix

y_pred = knn.predict(X_test)

conf_matrix = confusion_matrix(y_test, y_pred)

print('\nConfusion Matrix:\n', conf_matrix)


# Calculate precision, recall, and f1-score

precision = precision_score(y_test, y_pred)

recall = recall_score(y_test, y_pred)

f1 = f1_score(y_test, y_pred)


print('\nPrecision: {:.2f}'.format(precision))

print('Recall: {:.2f}'.format(recall))

print('F1 Score: {:.2f}'.format(f1))


# Create a graphical representation of the confusion matrix using seaborn

plt.figure(figsize=(8, 6))

sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['No Diabetes', 'Diabetes'], yticklabels=['No Diabetes', 'Diabetes'])

plt.xlabel('Predicted')

plt.ylabel('Actual')

plt.title('Confusion Matrix')

plt.show()


Confusion Matrix - Graphical

 


import seaborn as sns

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import confusion_matrix


# Assuming 'Outcome' is the target variable in your dataset

X_train, X_test, y_train, y_test = train_test_split(diabetes.loc[:, diabetes.columns != 'Outcome'], diabetes['Outcome'], stratify=diabetes['Outcome'], random_state=66)


# Create and train the KNN classifier

knn = KNeighborsClassifier(n_neighbors=9)

knn.fit(X_train, y_train)


# Print accuracy on training and test sets

print('Accuracy of K-NN classifier on training set: {:.2f}'.format(knn.score(X_train, y_train)))

print('Accuracy of K-NN classifier on test set: {:.2f}'.format(knn.score(X_test, y_test)))


# Generate and print confusion matrix

y_pred = knn.predict(X_test)

conf_matrix = confusion_matrix(y_test, y_pred)

print('\nConfusion Matrix:\n', conf_matrix)


# Create a graphical representation of the confusion matrix using seaborn

plt.figure(figsize=(8, 6))

sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['No Diabetes', 'Diabetes'], yticklabels=['No Diabetes', 'Diabetes'])

plt.xlabel('Predicted')

plt.ylabel('Actual')

plt.title('Confusion Matrix')

plt.show()


Confusion Matrix

 


from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix

# Assuming 'Outcome' is the target variable in your dataset
X_train, X_test, y_train, y_test = train_test_split(diabetes.loc[:, diabetes.columns != 'Outcome'], diabetes['Outcome'], stratify=diabetes['Outcome'], random_state=66)

# Create and train the KNN classifier
knn = KNeighborsClassifier(n_neighbors=9)
knn.fit(X_train, y_train)

# Print accuracy on training and test sets
print('Accuracy of K-NN classifier on training set: {:.2f}'.format(knn.score(X_train, y_train)))
print('Accuracy of K-NN classifier on test set: {:.2f}'.format(knn.score(X_test, y_test)))

# Generate and print confusion matrix
y_pred = knn.predict(X_test)
conf_matrix = confusion_matrix(y_test, y_pred)
print('\nConfusion Matrix:\n', conf_matrix)

Wednesday, 12 April 2023

Apriori Algorithm

 Hi all

Apriori Algorithm

 import numpy as nm

import matplotlib.pyplot as mtp  
import pandas as pd 
from apyori import apriori  
import urllib.request

url="http://localhost/web/hupaddpairs.php?item1="

dataset = pd.read_csv('item_set1.csv')  
transactions=[]  
for i in range(0, 32):
    transactions.append([str(dataset.values[i,j])  for j in range(0,5)]) 

vals=""
rules= apriori(transactions= transactions, min_support=0.003, min_confidence = 0.2, min_lift=3, min_length=2, max_length=2) 
results= list(rules)  
for item in results:
    pair = item[0]   
    items = [x for x in pair]  
    print("Rule: " + items[0] + " -> " + items[1])  
    print("Support: " + str(item[1]))  
    print("Confidence: " + str(item[2][0][2]))  
    print("Lift: " + str(item[2][0][3]))  
    print("=====================================")  
    vals=url+items[0]+"&item2="+items[1]
    print(vals)
    webUrl = urllib.request.urlopen(vals)
    vals=""

-------------------
Data
item_set1.csv

electronics.smartphone,	electronics.video.tv			
electronics.smartphone,	electronics.video.tv,appliances.kitchen.washer		
electronics.smartphone,	electronics.audio.headphone			
electronics.audio.headphone,electronics.smartphone	appliances.environment.vacuum,kids.skates	

Call a php program from Python

 Dear all

import urllib

url="http://localhost/huphand/huphand.php?code="

i=i+1

url=url+str(i)

webUrl = urllib.request.urlopen(url)

Sunday, 2 April 2023

How to load model using keras in Python

 qn) How to load model using keras in Python

Ans)

from keras.models import load_model
modelmain = load_model("hup_bacteria_lstm.h5")
print(modelmain.summary())

How to save model using keras in Python

 Qn) How to save model using keras in Python

Ans)

model.save("hup_bacteria_lstm.h5")

Show image using cv2

 Qn) How to show image using cv2 in Python

Ans)

from google.colab.patches import cv2_imshow
hupimg=cv2.imread('Bacteroides.fragilis/Bacteroides.fragilis_0001.tif')
cv2_imshow(hupimg)

Friday, 31 March 2023

How to encode text to numeric using fit_transform

 Hi all,

Qn) How to encode text to numeric using fit_transform

Ans)

import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer

df = pd.read_csv("hupassg.tsv", sep='\t', encoding='ISO-8859-1');

vectorizer1 = CountVectorizer(max_features = 10000, ngram_range=(13), stop_words='english')
count_vector1 = vectorizer1.fit_transform(df['clean_assg'])
feature_names1 = vectorizer1.get_feature_names_out()
data1 = df[['assg_set','clean_assg','final_score']].copy()
X = count_vectors1.toarray()
y = data1['final_score'].to_numpy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

Save and load model in python using pickle

 Dear all,

Qn) How to save and load a model using Pickle in Python

Ans)

import pickle
filename = 'hupscoringsvm.sav'
pickle.dump(model, open(filename, 'wb'))

--------
import pickle
filename = 'hupscoringsvm.sav'
loaded_model = pickle.load(open(filename, 'rb'))
y_pred=loaded_model.predict(X_test)
print(y_pred)


Save Feature using Pickle

 Dear all,

Qn. How can we save features using pickle in python

Ans. 

import pickle
pickle.dump(X, open('X.pkl''wb'))
pickle.dump(y, open('y.pkl''wb'))
X = pickle.load(open('X.pkl''rb'))
y = pickle.load(open('y.pkl''rb'))
//And split to make testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

Wednesday, 11 January 2023

Machine Learning Viva questions

 Dear All,

1. Why ML

By default machines or computers can't learn. So we can make computers learn with the help of ML algorithms

2. what is a model

A model can learn and answer your questions. Human brain is the best example. 


3. What is training data

data used for learning.

4. what is test data

The data used to test the model for finding the efficiency of the model

5. what is labelled data

data with the answer or label

6. What is accuracy

How accurate is the model for test data

eg 90% is good

40% is poor

To calculate the accuracy we need testdata with answer/label/key/tag


7. What are csv files

Comma Separated values which is a portable data format that work across different Operating systems.

That can be opend using excel too


8. Important packages in ML

pandas, numpy, matplotlib


9. use of pandas

Reading and processing csv fiels


10. use of numpy

Advanced numerical computations 


11. use of matplotlib

visualization using graphs, charts etc


12. What X and y indicated in ML


X - indicates Data part

y- indicates label part


13. X_train, X_test, y_train, y_test = train_test_split(diabetes.loc[:, diabetes.columns != 'Outcome'], diabetes['Outcome'], stratify=diabetes['Outcome'], random_state=66)


diabetes.loc[:, diabetes.columns != 'Outcome'] - here ':' indicates starting position of data part which is 'first column', and  diabetes.columns != 'Outcome' indicates ending position of data part which is just before Outcome column. - X


diabetes['Outcome'] - indicates the label part which is y


random_state=66 indicates the percentage of Training data. Test data will e 34% of the total data


14. from sklearn.neighbors import KNeighborsClassifier


sklearn is the package which contains ML algorithms

Here KNeighborsClassifier is imported from neighbors subpackage of sklearn


15. knn = KNeighborsClassifier(n_neighbors=9)

Create a variable corresponding to the algorithm KNeighborsClassifier with model parameter n_neighbors with value 9


16. What is model parameter

For each algorithm there can be some parameters or settings


17.knn.fit(X_train, y_train)

Training the model with data (X_train) and label/answer(y_train)


18.knn.score(X_test, y_test)


This is the testing process for calculating the score/accuracy of the model

step1 - Testing the model with unknown data X_test

step2- the result obtained from step1 is y_pred

step3- compare y_pred with y_test 


19. why we calculate accuracy

we are calculating accuracy of a model for its deployment/applicability.

if we have decent accuracy then we can apply in real world applications.


20. deploy

new_data=[[4,111,92,0,0,36.6,0.190,31]]

ans=knn.predict(new_data)


Here unknown data is stored in variable 'new_data'.

'predict()' function test the data with the model and result is given in variable 'ans'


21. Full cycle


1. Download data and upload to colab

2. Split the data to train and test

3. Create a model variable

4. Train the model with training data

5. Find accuracy of the model using score function.

6. Deploy the model with unknown data using predict function and show the answer to user.










KNN Classifier using Python

  HI

Using KNN classifier in Python we can do classification of data.

import pandas as pd
da = pd.read_csv("hup.csv"

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

X_train, X_test, y_train, y_test = train_test_split(da.loc[:, da.columns != 'Activity'], da['Activity'], stratify=da['Activity'], random_state=66)

from sklearn.neighbors import KNeighborsClassifier 
knn = KNeighborsClassifier(n_neighbors = 1
knn.fit(X_train, y_train) 

print('KNN accuracy with training data {:.2f}'.format(knn.score(X_train, y_train))) print('Accuracy Using test data: {:.2f}'.format(knn.score(X_test, y_test)))

Read from one csv file and write to new csv file

 Hi,

We can use pandas for reading csv file.

Using csv package we can write to another csv file.

import pandas as pd
hupactivity = pd.read_csv('huptest.csv')

import csv
csvoutput=open('data.csv''w')
writer = csv.writer(csvoutput, lineterminator='\n')

all = []

for i in hupactivity:
  print(i)
  all.append(row)
writer.writerows(all)

Use Google Drive in Google colab

 Hi All,

While analising data using Google colab, it is better to use google colab as the data will be permanently saved in drive.

from google.colab import drive
drive.mount('/content/drive')
cd /content/drive/MyDrive/HUPHealth

Here HUPHealth is your folder in Drive. You may change according to your choice.