We use cookies to ensure you have the best browsing experience on our website. Please read our cookie policy for more information about how we use cookies.
import sys
import math
from collections import defaultdict
def train_naive_bayes(X_train, y_train):
# Count the number of instances for each class (+1, -1)
class_count = defaultdict(int)
feature_count = defaultdict(lambda: defaultdict(int))
for i in range(len(X_train)):
label = y_train[i]
class_count[label] += 1
for j, val in enumerate(X_train[i]):
feature_count[label][j, val] += 1
return class_count, feature_count
for x in X_test:
probs = {}
for label in class_count:
prob_label = math.log(class_count[label] / total_docs)
prob_features = 0
for j, val in enumerate(x):
feature_prob = (feature_count[label].get((j, val), 0) + 1) / (class_count[label] + len(x))
prob_features += math.log(feature_prob)
probs[label] = prob_label + prob_features
# Assign the class with the highest probability
predicted_label = max(probs, key=probs.get)
predictions.append(predicted_label)
return predictions
if name == 'main':
input_data = sys.stdin.read().splitlines()
N, M = map(int, input_data[0].split())
X_train = []
y_train = []
# Read the training data
for i in range(1, N + 1):
parts = input_data[i].split()
label = 1 if parts[1] == '+1' else -1
features = [0.0] * M
for feat in parts[2:]:
idx, val = feat.split(':')
features[int(idx) - 1] = float(val)
X_train.append(features)
y_train.append(label)
# Read query data
q = int(input_data[N + 1])
query_ids = []
X_test = []
for i in range(N + 2, N + 2 + q):
parts = input_data[i].split()
query_ids.append(parts[0])
features = [0.0] * M
for feat in parts[1:]:
idx, val = feat.split(':')
features[int(idx) - 1] = float(val)
X_test.append(features)
# Train the Naive Bayes model
class_count, feature_count = train_naive_bayes(X_train, y_train)
# Predict the results
predictions = predict_naive_bayes(X_test, class_count, feature_count)
# Print the output
for i in range(q):
print(f"{query_ids[i]} {'+1' if predictions[i] == 1 else '-1'}")
Cookie support is required to access HackerRank
Seems like cookies are disabled on this browser, please enable them to open this website
Quora Answer Classifier
You are viewing a single comment's thread. Return to all comments →
import sys import math from collections import defaultdict
def train_naive_bayes(X_train, y_train): # Count the number of instances for each class (+1, -1) class_count = defaultdict(int) feature_count = defaultdict(lambda: defaultdict(int))
def predict_naive_bayes(X_test, class_count, feature_count): predictions = [] total_docs = sum(class_count.values())
if name == 'main': input_data = sys.stdin.read().splitlines()