Skip to content

1044. Longest Duplicate Substring 👍

  • Time: $O(n\log n)$
  • Space: $O(n)$
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
class Solution {
 public:
  string longestDupSubstring(string s) {
    const int n = s.length();
    vector<int> pows(n, 1);
    int bestStart = -1;
    int l = 1;
    int r = n;

    for (int i = 1; i < n; ++i)
      pows[i] = pows[i - 1] * kBase % kHash;

    while (l < r) {
      const int m = (l + r) / 2;
      const int start = getStart(s, m, pows);
      if (start == -1) {
        r = m;
      } else {
        bestStart = start;
        l = m + 1;
      }
    }

    if (bestStart == -1)
      return "";
    if (getStart(s, l, pows) == -1)
      return s.substr(bestStart, l - 1);
    return s.substr(bestStart, l);
  }

 private:
  static constexpr long kBase = 26;
  static constexpr int kHash = 1'000'000'007;

  static constexpr int val(char c) {
    return c - 'a';
  }

  // k := the length of the substring to be hashed
  int getStart(const string& s, int k, const vector<int>& pows) {
    unordered_map<int, vector<int>> hashToStarts;
    long h = 0;

    // Compute the hash value of s[:k].
    for (int i = 0; i < k; ++i)
      h = (h * kBase % kHash + val(s[i])) % kHash;
    hashToStarts[h].push_back(0);

    // Compute the rolling hash by Rabin Karp.
    for (int i = k; i < s.length(); ++i) {
      const int startIndex = i - k + 1;
      h = ((h - static_cast<long>(pows[k - 1]) * val(s[i - k])) % kHash +
           kHash) %
          kHash;
      h = (h * kBase + val(s[i])) % kHash;
      if (const auto it = hashToStarts.find(h); it != hashToStarts.cend()) {
        const string currSub = s.substr(startIndex, k);
        for (const int start : it->second)
          if (s.substr(start, k) == currSub)
            return startIndex;
      }
      hashToStarts[h].push_back(startIndex);
    }

    return -1;
  }
};
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
class Solution {
  public String longestDupSubstring(String s) {
    final int n = s.length();
    int[] pows = new int[n];
    int bestStart = -1;
    int l = 1;
    int r = n;

    pows[0] = 1;
    for (int i = 1; i < n; ++i)
      pows[i] = (int) (pows[i - 1] * kBase % kHash);

    while (l < r) {
      final int m = (l + r) / 2;
      final int start = getStart(s, m, pows);
      if (start == -1) {
        r = m;
      } else {
        bestStart = start;
        l = m + 1;
      }
    }

    if (bestStart == -1)
      return "";
    if (getStart(s, l, pows) == -1)
      return s.substring(bestStart, bestStart + l - 1);
    return s.substring(bestStart, bestStart + l);
  }

  private static final long kBase = 26;
  private static final long kHash = 1_000_000_007;

  private static int val(char c) {
    return c - 'a';
  }

  // k := the length of the substring to be hashed
  private int getStart(final String s, int k, int[] pows) {
    Map<Long, List<Integer>> hashToStarts = new HashMap<>();
    long h = 0;

    // Compute the hash value of s[:k].
    for (int i = 0; i < k; ++i)
      h = (h * kBase % kHash + val(s.charAt(i))) % kHash;
    hashToStarts.put(h, new ArrayList<>());
    hashToStarts.get(h).add(0);

    // Compute the rolling hash by Rabin Karp.
    for (int i = k; i < s.length(); ++i) {
      final int startIndex = i - k + 1;
      h = ((h - (long) (pows[k - 1]) * val(s.charAt(i - k))) % kHash + kHash) % kHash;
      h = (h * kBase + val(s.charAt(i))) % kHash;
      if (hashToStarts.containsKey(h)) {
        final String currSub = s.substring(startIndex, startIndex + k);
        for (final int start : hashToStarts.get(h))
          if (s.substring(start, start + k).equals(currSub))
            return startIndex;
      }
      hashToStarts.put(h, new ArrayList<>());
      hashToStarts.get(h).add(startIndex);
    }

    return -1;
  }
}
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
class Solution:
  def longestDupSubstring(self, s: str) -> str:
    kBase = 26
    kHash = 1_000_000_007
    bestStart = -1
    l = 1
    r = len(s)

    def val(c: str) -> int:
      return string.ascii_lowercase.index(c)

    # k := the length of the substring to be hashed
    def getStart(k: int) -> int | None:
      maxPow = pow(kBase, k - 1, kHash)
      hashToStart = collections.defaultdict(list)
      h = 0

      # Compute the hash value of s[:k].
      for i in range(k):
        h = (h * kBase + val(s[i])) % kHash
      hashToStart[h].append(0)

      # Compute the rolling hash by Rabin Karp.
      for i in range(k, len(s)):
        startIndex = i - k + 1
        h = (h - maxPow * val(s[i - k])) % kHash
        h = (h * kBase + val(s[i])) % kHash
        if h in hashToStart:
          currSub = s[startIndex:startIndex + k]
          for start in hashToStart[h]:
            if s[start:start + k] == currSub:
              return startIndex
        hashToStart[h].append(startIndex)

    while l < r:
      m = (l + r) // 2
      start: int | None = getStart(m)
      if start:
        bestStart = start
        l = m + 1
      else:
        r = m

    if bestStart == -1:
      return ''
    if getStart(l):
      return s[bestStart:bestStart + l]
    return s[bestStart:bestStart + l - 1]