Detect HTML Attributes

  • + 0 comments

    My solution in python3:

    # Enter your code here. Read input from STDIN. Print output to STDOUT
    import re
    
    txt1 = ""
    N = int(input())
    for i in range(N):
        txt_substring = input()
        txt1 += f" {txt_substring}"
    
    tag_att_dict = {}
    
    matches = re.findall(
        r'(?<=<)[^>]+(?=>)',
        txt1
    )
    for i in matches:
        # print(i)
        # Split
        split_list = i.split(' ')
        # Tag
        tag = split_list[0]
        if tag not in tag_att_dict and not tag.startswith('/'):
            tag_att_dict[tag] = []
        # print(tag)
        # Attributes
        matches2 = re.findall(
            r'[a-z]+(?=(?:\=\"|\=\'))',
            i
        )
        for j in matches2:
            if j not in tag_att_dict[tag]:
                tag_att_dict[tag].append(j)
    
    tags = sorted(list(tag_att_dict.keys()))
    
    for i in tags:
        sorted_atts = ','.join(sorted(tag_att_dict[i]))
        print(f"{i}:{sorted_atts}")