Wednesday, 11 August 2021

Pandas and CSV

 Hi all,

Create a csv file like given below

No,Name,Place

1,HUP,Kollam

2,Abc,Test

3,Raj,Klm


#Program 1

import pandas as pd
df=pd.read_csv('hup1.csv')
print(df.to_string())


#Program 2

import pandas as pd
df=pd.read_csv('hup1.csv')
for ind in df.index:
  print(df['1'][ind], df['HUP'][ind])

Monday, 10 May 2021

Remove stop words and predict using Naive Bayes Classifier

 Hi all,

Use this code for NBC which removes stop words

-------------------------------------------------------------------------------

from nltk import NaiveBayesClassifier as nbc


from nltk.tokenize import word_tokenize


from itertools import chain


import csv
from gensim.parsing.preprocessing import remove_stopwords

from nltk.tokenize import word_tokenize



with open('trainingdata.csv','r'as csvinput:


    reader=csv.reader(csvinput,delimiter=",")


    rownum = 0 


    training_data = []



    for row in reader:
      old=row[0]
      sent=remove_stopwords(row[0])
      row[0]=sent
     
      training_data.append (row)
      rownum += 1
      print('hup original ',old)
      print('hup new ',sent)
      print('----------------')



vocabulary = set(chain(*[word_tokenize(i[0].lower()) for i in training_data]))



feature_set = [({i:(i in word_tokenize(sentence.lower())) for i in vocabulary},tag) for sentence, tag in training_data]



classifier = nbc.train(feature_set)



with open('testdata.csv','r'as csvinput:


    with open('data.csv''w'as csvoutput:


        writer = csv.writer(csvoutput, lineterminator='\n')


        reader1 = csv.reader(csvinput)



        all = []


        row = next(reader1)


        



        for row in reader1:


            test_sentence = row[1]


            featurized_test_sentence =  {i:(i in word_tokenize(test_sentence.lower())) for i in vocabulary}


            print ("test_sent:",test_sentence)


            print ("tag:",classifier.classify(featurized_test_sentence))


            row.append(classifier.classify(featurized_test_sentence))


            all.append(row)


        writer.writerows(all)