Discussion on Detect HTML Tags, Attributes and Attribute Values Challenge

11 months ago+ 0 comments

import re
import sys

N = int(input())
text = sys.stdin.read()

text = """<head>
<title>HTML</title>
</head>
<object type="application/x-flash" 
  data="your-file.swf" 
  width="0" height="0">
  <!-- <param name="movie" value="your-file.swf" /> -->
  <param name="quality" value="high"/>
</object>"""

regex_endtag = r'(?<=<)([a-zA-Z0-9]+)([\s\S]*?)/?>'
regex_comment = r'\<!--.*?-\>'

processed_input = re.sub(regex_comment, '', text)


header = []
for tag in re.findall(regex_endtag, processed_input):
    header.append(tag)

cleaned_list = [(key, value.replace('\n', '').replace('"', '').strip()) for key, value in header]

cleaned_list_final = [
    (key,) if value == '' else (key, value) 
    for key, value in cleaned_list
]

def print_formatted_list(items):
    for item in items:
        tag = item[0]
        print(tag)
        if len(item) > 1:
            attributes = item[1].split()
            for attribute in attributes:
                key, value = attribute.split('=')
                print(f'-> {key} > {value}')

print_formatted_list(cleaned_list_final)

Cookie support is required to access HackerRank