You are viewing a single comment's thread. Return to all comments →
import pandas as pd
data = pd.read_csv("http://s3.amazonaws.com/hr-testcases/597/assets/trainingdata.txt")
data['target'] = data['5485'].astype(str).str[0]
data.target = data.target.astype(int)
data.rename(columns={"5485": "text",}, inplace = True)
for txt in data.text: txt = txt[1:]
data['text'] = data['text'].str[1:]
from sklearn.feature_extraction.text import CountVectorizer
md= CountVectorizer()
cvTrain = md.fit_transform(data.text)
from sklearn.naive_bayes import MultinomialNB clf = MultinomialNB()
clf.fit(cvTrain,data.target)
t= int(input()) lines = [] for i in range(t): line= input()
lines.append(line)
test = md.fit_transform(lines) pred = clf.predict(test) for i in pred: print(pred[0])
Seems like cookies are disabled on this browser, please enable them to open this website
Document Classification
You are viewing a single comment's thread. Return to all comments →
Please someone help its showing time out error
import pandas as pd
data = pd.read_csv("http://s3.amazonaws.com/hr-testcases/597/assets/trainingdata.txt")
data['target'] = data['5485'].astype(str).str[0]
data.target = data.target.astype(int)
data.rename(columns={"5485": "text",}, inplace = True)
for txt in data.text: txt = txt[1:]
data['text'] = data['text'].str[1:]
from sklearn.feature_extraction.text import CountVectorizer
md= CountVectorizer()
cvTrain = md.fit_transform(data.text)
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB()
clf.fit(cvTrain,data.target)
t= int(input()) lines = [] for i in range(t): line= input()
test = md.fit_transform(lines) pred = clf.predict(test)
for i in pred: print(pred[0])