• + 0 comments

    import sys import math from collections import defaultdict

    def train_naive_bayes(X_train, y_train): # Count the number of instances for each class (+1, -1) class_count = defaultdict(int) feature_count = defaultdict(lambda: defaultdict(int))

    for i in range(len(X_train)):
        label = y_train[i]
        class_count[label] += 1
        for j, val in enumerate(X_train[i]):
            feature_count[label][j, val] += 1
    
    return class_count, feature_count
    

    def predict_naive_bayes(X_test, class_count, feature_count): predictions = [] total_docs = sum(class_count.values())

    for x in X_test:
        probs = {}
    
        for label in class_count:
            prob_label = math.log(class_count[label] / total_docs)
            prob_features = 0
    
            for j, val in enumerate(x):
                feature_prob = (feature_count[label].get((j, val), 0) + 1) / (class_count[label] + len(x))
                prob_features += math.log(feature_prob)
    
            probs[label] = prob_label + prob_features
    
        # Assign the class with the highest probability
        predicted_label = max(probs, key=probs.get)
        predictions.append(predicted_label)
    
    return predictions
    

    if name == 'main': input_data = sys.stdin.read().splitlines()

    N, M = map(int, input_data[0].split())
    
    X_train = []
    y_train = []
    
    # Read the training data
    for i in range(1, N + 1):
        parts = input_data[i].split()
        label = 1 if parts[1] == '+1' else -1
        features = [0.0] * M
        for feat in parts[2:]:
            idx, val = feat.split(':')
            features[int(idx) - 1] = float(val)
        X_train.append(features)
        y_train.append(label)
    
    # Read query data
    q = int(input_data[N + 1])
    query_ids = []
    X_test = []
    
    for i in range(N + 2, N + 2 + q):
        parts = input_data[i].split()
        query_ids.append(parts[0])
        features = [0.0] * M
        for feat in parts[1:]:
            idx, val = feat.split(':')
            features[int(idx) - 1] = float(val)
        X_test.append(features)
    
    # Train the Naive Bayes model
    class_count, feature_count = train_naive_bayes(X_train, y_train)
    
    # Predict the results
    predictions = predict_naive_bayes(X_test, class_count, feature_count)
    
    # Print the output
    for i in range(q):
        print(f"{query_ids[i]} {'+1' if predictions[i] == 1 else '-1'}")