Sort by

recency

|

104 Discussions

|

  • + 0 comments

    python sol

    import math 
    
    n = int(input())
    x = list(map(float, input().split()))
    y = list(map(float, input().split()))
    lx = len(x)
    ly = len(y)
    
    meanX = sum(x)/lx
    meanY = sum(y)/ly
    
    sdX = math.sqrt(sum([math.pow(xi-meanX, 2) for xi in x])/lx)
    sdY = math.sqrt(sum([math.pow(yi-meanY, 2) for yi in y])/ly)
    
    
    PCC = (sum((xi-meanX)*(yi-meanY) for xi, yi in zip(x, y)))/(n*sdX*sdY)
    
    print(round(PCC, 3))
    
  • + 0 comments
    #python
    from statistics import pstdev, mean
    n=int(input())
    X=list(map(float, input().split()))
    Y=list(map(float, input().split()))
    res=[(X[i]-mean(X))*(Y[i]-mean(Y)) for i in range(n)]
    p=sum(res)/(n*pstdev(X)*pstdev(Y))
    print(round(p, 3))
    
  • + 0 comments

    I am bit confused why its not mentioned to take the population standard deviation instead of the sample standarddeviation

    # Enter your code here. Read input from STDIN. Print output to STDOUT
    import math
    import statistics
    n = int(input())
    
    
    X =list(map(float, input().split()))
    
    
    Y =list(map(float, input().split()))
        
    
    def calcpearsonCov(dataset1, dataset2):
        averagex= statistics.mean(dataset1)
        averagey = statistics.mean(dataset2)
        result = 0
        for i in range(0, len(dataset1)):
            result += (dataset1[i] - averagex) * (dataset2[i]- averagey)
        #result = result *(1/len(dataset1))
        stdevx = statistics.pstdev(dataset1)
        stdevy = statistics.pstdev(dataset2)
        result = result/(n*(stdevx*stdevy))
        return result
        
    result = calcpearsonCov(X, Y)
    print(round(result, 3))
    
        
            
    
  • + 0 comments

    in R could be

    stdin <- file('stdin')
    open(stdin)
    n <- as.integer(trimws(readLines(stdin, n = 1, warn = FALSE), which = "both"))
    dataX <- as.numeric(strsplit(trimws(readLines(stdin, n = 1, warn = FALSE), which = "right"), " ")[[1]])
    dataY <- as.numeric(strsplit(trimws(readLines(stdin, n = 1, warn = FALSE), which = "right"), " ")[[1]])
    correlation = cor(dataX, dataY, method = 'pearson')
    cat(round(correlation, 3))
    
  • + 0 comments

    Easy Solution:

    # import statistics as stat
    from sys import stdin, stdout
    import math
    
    def mean(arr):
        return sum(arr) / len(arr)
    
    def sd(arr, mean):
        squared_diff_sum = sum((x - mean)**2 for x in arr)
        variance = squared_diff_sum / len(arr)
        return math.sqrt(variance)
    
    def covariance(X, Y):
        mean_X = mean(X)
        mean_Y = mean(Y)
        covariance_sum = sum((X[i] - mean_X) * (Y[i] - mean_Y) for i in range(len(X)))
        cov = covariance_sum / len(X)
        return cov
    
    def pearson_correlation(X, Y):
        cov_XY = covariance(X, Y)
        st_dev_X = sd(X, mean(X))
        st_dev_Y = sd(Y, mean(Y))
        coefficient = cov_XY / (st_dev_X * st_dev_Y)
        return coefficient
    
    n = int(stdin.readline().strip())
    X = list(map(float, stdin.readline().strip().split()))
    Y = list(map(float, stdin.readline().strip().split()))
    
    if len(X) != len(Y):
        print("Error: Data sets X and Y must have equal lengths")
    else:
        t = pearson_correlation(X, Y)
        print(round(t, 3))