Discussion on Build a Stack Exchange Scraper Challenge

Sort by

recency

|

108 Discussions

|

1 week ago+ 0 comments

import re

regex = r'href=\"\/questions\/(\d+).*?class=\"question-hyperlink\">([^<]+)<.*?class="relativetime">([^<]+)<'

singleline = ""

try:
    while True:
        line = input()
        singleline += line
except EOFError:
    ...
    
matches = re.findall(regex, singleline)

for ans in matches:
    print(';'.join(ans))

3 weeks ago+ 0 comments

import re
import sys
t=sys.stdin.read()
a=re.findall(r"(?<=questions\/)\d+(?=\/)",t)
b=re.findall(r"(?<=hyperlink\"\>).+(?=\<\/a\>)",t)
c=re.findall(r"(?<=relativetime\"\>).+(?=\<)",t)
l=list()
for i in range(len(a)):
    l.append(a[i])
    l.append(b[i])
    l.append(c[i])
    print(";".join(l))
    l=[]

2 months ago+ 0 comments

My solution (Python):

# Enter your code here. Read input from STDIN. Print output to STDOUT
import re
import sys

# txt1 = input() 
txt1 = sys.stdin.read()
matches1 = re.findall(
    r'<a href="\/questions\/(\d+)\/.+?\>+?([^<]+)(?:<\/a><\/h3>)',
    txt1,
    re.S
)
matches2 = re.findall(
    r'asked.?<(?:[^>])+>([^<]+)<',
    txt1,
    re.S
)
for i, j in zip(matches1, matches2):
    print(f"{i[0]};{i[1]};{j}")

10 months ago+ 0 comments

Javascript: //(.?) --> It will stop until the and characters else . will become greedy and match all the content without stopping.

// /gs --> The dotAll flag(s) changes the behavior of the. (dot) metacharacter in the regular expression. by default . matches any charactes in the regex except newLine characters \n by adding /s dotAll flag . will consider the newline \n characters as well/gs.

const splitInput = input.split('class="question-summary"'); 
if (splitInput && splitInput.length) {
     splitInput.forEach(question => {
        const regexContent = /.*question-summary-([0-9]+).*?class="question-hyperlink">(.*?)<\/a\>.*class="relativetime">(.*?)<\/span>.*/gs;
        const replaceContentexec = regexContent.exec(question);
        if (replaceContentexec) {
            replaceContentexec.shift();
            console.log(replaceContentexec.join(';'));   
        }
     })   
}

11 months ago+ 0 comments

Bash

#!/bin/bash 

readarray myArray 

for line in "${myArray[@]}"; do

if [[ $line =~ question ]] || [[ $line =~ relativetime ]]; then
echo "$line" | grep -E -o '(questions\/\d+\/)|(question-hyperlink">.+<\/a>)|(relativetime">.+<)'| 
tr -d '\n' | 
sed 's/question-hyperlink">/;/; s/<\/a>/;/; s/relativetime">//; s/</\n/; s/questions\///; s/\///'   
fi
done

Sort by

|

108 Discussions

|

Cookie support is required to access HackerRank