Discussion on Day 7: Pearson Correlation Coefficient I Challenge

Sort by

recency

|

104 Discussions

|

3 months ago+ 0 comments

python sol

import math 

n = int(input())
x = list(map(float, input().split()))
y = list(map(float, input().split()))
lx = len(x)
ly = len(y)

meanX = sum(x)/lx
meanY = sum(y)/ly

sdX = math.sqrt(sum([math.pow(xi-meanX, 2) for xi in x])/lx)
sdY = math.sqrt(sum([math.pow(yi-meanY, 2) for yi in y])/ly)


PCC = (sum((xi-meanX)*(yi-meanY) for xi, yi in zip(x, y)))/(n*sdX*sdY)

print(round(PCC, 3))

2 years ago+ 0 comments

#python
from statistics import pstdev, mean
n=int(input())
X=list(map(float, input().split()))
Y=list(map(float, input().split()))
res=[(X[i]-mean(X))*(Y[i]-mean(Y)) for i in range(n)]
p=sum(res)/(n*pstdev(X)*pstdev(Y))
print(round(p, 3))

2 years ago+ 0 comments

I am bit confused why its not mentioned to take the population standard deviation instead of the sample standarddeviation

# Enter your code here. Read input from STDIN. Print output to STDOUT
import math
import statistics
n = int(input())


X =list(map(float, input().split()))


Y =list(map(float, input().split()))
    

def calcpearsonCov(dataset1, dataset2):
    averagex= statistics.mean(dataset1)
    averagey = statistics.mean(dataset2)
    result = 0
    for i in range(0, len(dataset1)):
        result += (dataset1[i] - averagex) * (dataset2[i]- averagey)
    #result = result *(1/len(dataset1))
    stdevx = statistics.pstdev(dataset1)
    stdevy = statistics.pstdev(dataset2)
    result = result/(n*(stdevx*stdevy))
    return result
    
result = calcpearsonCov(X, Y)
print(round(result, 3))

2 years ago+ 0 comments

in R could be

stdin <- file('stdin')
open(stdin)
n <- as.integer(trimws(readLines(stdin, n = 1, warn = FALSE), which = "both"))
dataX <- as.numeric(strsplit(trimws(readLines(stdin, n = 1, warn = FALSE), which = "right"), " ")[[1]])
dataY <- as.numeric(strsplit(trimws(readLines(stdin, n = 1, warn = FALSE), which = "right"), " ")[[1]])
correlation = cor(dataX, dataY, method = 'pearson')
cat(round(correlation, 3))

2 years ago+ 0 comments

Easy Solution:

# import statistics as stat
from sys import stdin, stdout
import math

def mean(arr):
    return sum(arr) / len(arr)

def sd(arr, mean):
    squared_diff_sum = sum((x - mean)**2 for x in arr)
    variance = squared_diff_sum / len(arr)
    return math.sqrt(variance)

def covariance(X, Y):
    mean_X = mean(X)
    mean_Y = mean(Y)
    covariance_sum = sum((X[i] - mean_X) * (Y[i] - mean_Y) for i in range(len(X)))
    cov = covariance_sum / len(X)
    return cov

def pearson_correlation(X, Y):
    cov_XY = covariance(X, Y)
    st_dev_X = sd(X, mean(X))
    st_dev_Y = sd(Y, mean(Y))
    coefficient = cov_XY / (st_dev_X * st_dev_Y)
    return coefficient

n = int(stdin.readline().strip())
X = list(map(float, stdin.readline().strip().split()))
Y = list(map(float, stdin.readline().strip().split()))

if len(X) != len(Y):
    print("Error: Data sets X and Y must have equal lengths")
else:
    t = pearson_correlation(X, Y)
    print(round(t, 3))

Sort by

|

104 Discussions

|

Cookie support is required to access HackerRank