Detect the Domain Name

  • + 0 comments
    import re
    
    n=int(input())
    htmls="\n".join(input() for _ in range(n))
    
    urls = re.findall(r"https?://[^\s\"'>?]+\.[a-z]{2,3}", htmls)
    domains=set()
    for url in urls:
        domain=re.sub(r"^https?://?","",url)
        domain=domain.split("/")[0]
        if "." in domain:
            domain=re.sub(r"^(www\d*\.)","",domain)
            domains.add(domain)
    print(";".join(sorted(domains)))