Skip to content

1410. HTML Entity Parser 👎

  • Time: $O(|\texttt{text}|)$
  • Space: $O(|\texttt{text}|)$
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
class Solution {
 public:
  string entityParser(string text) {
    const unordered_map<string, char> entityToChar{
        {"&quot;", '"'}, {"&apos;", '\''}, {"&amp;", '&'},
        {"&gt;", '>'},   {"&lt;", '<'},    {"&frasl;", '/'}};
    string ans;
    int j = 0;  // text[j..ampersandIndex - 1] is the pending substring.
    int ampersandIndex = -1;

    for (int i = 0; i < text.length(); ++i)
      if (text[i] == '&') {
        ampersandIndex = i;
      } else if (text[i] == ';' && ampersandIndex >= j) {
        const string sub = text.substr(ampersandIndex, i - ampersandIndex + 1);
        ans += text.substr(j, ampersandIndex - j);
        ans += getCharIfMatched(text, sub, entityToChar);
        j = i + 1;
      }

    return ans + text.substr(j);
  }

 private:
  string getCharIfMatched(const string& text, const string& sub,
                          const unordered_map<string, char>& entityToChar) {
    for (const auto& [entity, c] : entityToChar)
      if (entity == sub)
        return string(1, c);
    return sub;
  }
};
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
class Solution {
  public String entityParser(String text) {
    Map<String, String> entryToChar =
        Map.of("&quot;", "\"", "&apos;", "'", "&gt;", ">", "&lt;", "<", "&frasl;", "/");

    for (Map.Entry<String, String> entry : entryToChar.entrySet()) {
      final String entity = entry.getKey();
      final String c = entry.getValue();
      text = text.replaceAll(entity, c);
    }

    // Process '&' in last.
    return text.replaceAll("&amp;", "&");
  }
}
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
class Solution:
  def entityParser(self, text: str) -> str:
    entityToChar = {'&quot;': '"', '&apos;': '\'',
                    '&gt;': '>', '&lt;': '<', '&frasl;': '/'}

    for entity, c in entityToChar.items():
      text = text.replace(entity, c)

    # Process '&' in last.
    return text.replace('&amp;', '&')