Hi all,
Qn) How to encode text to numeric using fit_transform
Ans)
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
df = pd.read_csv("hupassg.tsv", sep='\t', encoding='ISO-8859-1');
vectorizer1 = CountVectorizer(max_features = 10000, ngram_range=(1, 3), stop_words='english')
count_vector1 = vectorizer1.fit_transform(df['clean_assg'])
feature_names1 = vectorizer1.get_feature_names_out()
data1 = df[['assg_set','clean_assg','final_score']].copy()
X = count_vectors1.toarray()
y = data1['final_score'].to_numpy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)