Discussion on Detect HTML links Challenge

3 weeks ago+ 0 comments

For Python 3:

import re
A_TAG = re.compile(
    r'<a\s+[^>]*?href\s*=\s*' 
    r'([\'"])(.*?)\1'        
    r'[^>]*>'                
    r'(.*?)'                 
    r'</a>',                 
    flags=re.IGNORECASE | re.DOTALL
)

TAG_STRIP = re.compile(r'<[^>]+>')
WS        = re.compile(r'\s+')

def clean_text(raw):
    return WS.sub(' ', TAG_STRIP.sub('', raw)).strip()

N = int(input())
out_lines = []
for line in range(N):
    line = input()
    for quote, url, txt in A_TAG.findall(line):
        out_lines.append(f"{url},{clean_text(txt)}")
print("\n".join(out_lines))

Cookie support is required to access HackerRank