Attribute Parser

  • + 0 comments
    #include <vector>
    #include <iostream>
    #include <map>
    
    using namespace std;
    
    string join(const vector<string>& tokens, const char& delimiter);
    int find_non_whitespace(const string& s, const int& start);
    int prev_whitespace(const string& s, const int& start);
    
    int main() {
        int n, q;
        cin >> n >> q;
        cin.ignore();
        
        vector<string> hrml, queries;
        map<string, string> m;
        string input;
        
        while (n--) {
            getline(cin, input);
            hrml.push_back(input);
        }
        while (q--) {
            getline(cin, input);
            queries.push_back(input);
        }
        vector<string> tag_strings;
        for (string line : hrml) {
            int begin = line.find('<', 0), end;
            string t_str;
            while ((end = line.find('>', begin)) != string::npos) {
                t_str = line.substr(begin + 1, end - begin - 1);
                begin = line.find('<', end);
                tag_strings.push_back(t_str);
            }
        }
        vector<string> tags;
        for (string t_str : tag_strings) {
            vector<string> attrs;
            if (t_str[0] != '/') {
                int tag_begin = find_non_whitespace(t_str, 0);
                int tag_end = t_str.find(' ', tag_begin);
                tags.push_back(t_str.substr(tag_begin, tag_end));
                int a_begin = find_non_whitespace(t_str, tag_end); 
                int a_end;
                string attr;
                while ((a_end = t_str.find('=', a_begin)) 
                                                != string::npos) {
                    int trim_end = prev_whitespace(t_str, a_end);
                    attr = t_str.substr(a_begin, trim_end - a_begin);
                    attrs.push_back(attr);
                    a_begin = t_str.find('"', a_end) + 1;
                    a_end = t_str.find('"', a_begin + 1);
                    string k = 
                      join(tags, '.') + "~" + attrs[attrs.size() - 1];
                    string v = t_str.substr(a_begin, a_end - a_begin);
                    m[k] = v;
                    a_begin = find_non_whitespace(t_str, a_end + 1);
                }
            } else tags.pop_back();
        }
        for (string query : queries) {
            if(m.count(query)) cout << m[query] << endl;
            else cout << "Not Found!" << endl;
        }
    
        return 0;
    }
    
    string join(const vector<string>& tokens, const char& delimiter) {
        if (tokens.size() == 0) return "";
        if (tokens.size() == 1) return tokens[0];
        string delimited_string = tokens[0] + delimiter;
        for (int i = 1; i < tokens.size(); i++) {
            if (i < tokens.size() - 1) {
                delimited_string += tokens[i] + delimiter;
            } else delimited_string += tokens[i];
        }
        return delimited_string;
    }
    
    int find_non_whitespace(const string& s, const int& start) {
      for (int i = start; i < s.length(); i++) {
        if (!isspace(s[i])) return i;
      }
      return s.length();
    }
    
    int prev_whitespace(const string& s, const int& start) {
        for (int i = start - 1; i > 0; i--) {
            if (!isspace(s[i])) return i + 1;
        }
        return 0;
    }