Skip to content

1044. Longest Duplicate Substring 👍

  • Time: $O(n\log n)$
  • Space: $O(n)$
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
class Solution {
 public:
  string longestDupSubstring(string s) {
    constexpr int kMod = 1e9 + 7;
    const int n = s.length();
    vector<int> pows(n, 1);
    int bestStart = -1;
    int l = 1;
    int r = n;

    for (int i = 1; i < n; ++i)
      pows[i] = (pows[i - 1] * 26L) % kMod;

    while (l < r) {
      const int m = (l + r) / 2;
      const int start = getStart(s, m, pows, kMod);
      if (start == -1) {
        r = m;
      } else {
        bestStart = start;
        l = m + 1;
      }
    }

    if (bestStart == -1)
      return "";
    if (getStart(s, l, pows, kMod) == -1)
      return s.substr(bestStart, l - 1);
    return s.substr(bestStart, l);
  }

 private:
  // k := length of hashed substring
  int getStart(const string& s, int k, const vector<int>& pows,
               const int& kMod) {
    unordered_map<int, vector<int>> hashedToStarts;
    long long h = 0;

    // compute hash value of s[:k]
    for (int i = 0; i < k; ++i)
      h = ((h * 26) % kMod + val(s[i])) % kMod;
    hashedToStarts[h].push_back(0);

    // compute rolling hash by Rabin Karp
    for (int i = k; i < s.length(); ++i) {
      const int startIndex = i - k + 1;
      h = ((h - static_cast<long long>(pows[k - 1]) * val(s[i - k])) % kMod +
           kMod) %
          kMod;
      h = (h * 26 + val(s[i])) % kMod;
      if (hashedToStarts.count(h)) {
        const auto& currSub = s.substr(startIndex, k);
        for (const int start : hashedToStarts[h])
          if (s.substr(start, k) == currSub)
            return startIndex;
      }
      hashedToStarts[h].push_back(startIndex);
    }

    return -1;
  }

  int val(char c) {
    return c - 'a';
  }
};
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
class Solution {
  public String longestDupSubstring(String s) {
    final int kMod = (int) 1e9 + 7;
    final int n = s.length();
    int[] pows = new int[n];
    int bestStart = -1;
    int l = 1;
    int r = n;

    pows[0] = 1;
    for (int i = 1; i < n; ++i)
      pows[i] = (int) ((pows[i - 1] * 26L) % (long) kMod);

    while (l < r) {
      final int m = (l + r) / 2;
      final int start = getStart(s, m, pows, kMod);
      if (start == -1) {
        r = m;
      } else {
        bestStart = start;
        l = m + 1;
      }
    }

    if (bestStart == -1)
      return "";
    if (getStart(s, l, pows, kMod) == -1)
      return s.substring(bestStart, bestStart + l - 1);
    return s.substring(bestStart, bestStart + l);
  }

  // k := length of hashed substring
  private int getStart(final String s, int k, int[] pows, int kMod) {
    Map<Long, List<Integer>> hashedToStarts = new HashMap<>();
    long h = 0;

    // compute hash value of s[:k]
    for (int i = 0; i < k; ++i)
      h = ((h * 26) % kMod + val(s.charAt(i))) % kMod;
    hashedToStarts.put(h, new ArrayList<>());
    hashedToStarts.get(h).add(0);

    // compute rolling hash by Rabin Karp
    for (int i = k; i < s.length(); ++i) {
      final int startIndex = i - k + 1;
      h = ((h - (long) (pows[k - 1]) * val(s.charAt(i - k))) % kMod + kMod) % kMod;
      h = (h * 26 + val(s.charAt(i))) % kMod;
      if (hashedToStarts.containsKey(h)) {
        final String currSub = s.substring(startIndex, startIndex + k);
        for (final int start : hashedToStarts.get(h))
          if (s.substring(start, start + k).equals(currSub))
            return startIndex;
      }
      hashedToStarts.put(h, new ArrayList<>());
      hashedToStarts.get(h).add(startIndex);
    }

    return -1;
  }

  private int val(char c) {
    return c - 'a';
  }
}
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
class Solution:
  def longestDupSubstring(self, s: str) -> str:
    kMod = int(1e9) + 7
    bestStart = -1
    l = 1
    r = len(s)

    def val(c: str) -> int:
      return ord(c) - ord('a')

    # k := length of hashed substring
    def getStart(k: int) -> Optional[int]:
      maxPow = pow(26, k - 1, kMod)
      hashedToStart = defaultdict(list)
      h = 0

      # compute hash value of s[:k]
      for i in range(k):
        h = (h * 26 + val(s[i])) % kMod
      hashedToStart[h].append(0)

      # compute rolling hash by Rabin Karp
      for i in range(k, len(s)):
        startIndex = i - k + 1
        h = (h - maxPow * val(s[i - k])) % kMod
        h = (h * 26 + val(s[i])) % kMod
        if h in hashedToStart:
          currSub = s[startIndex:startIndex + k]
          for start in hashedToStart[h]:
            if s[start:start + k] == currSub:
              return startIndex
        hashedToStart[h].append(startIndex)

    while l < r:
      m = (l + r) // 2
      start: Optional[int] = getStart(m)
      if start:
        bestStart = start
        l = m + 1
      else:
        r = m

    if bestStart == -1:
      return ''
    if getStart(l):
      return s[bestStart:bestStart + l]
    return s[bestStart:bestStart + l - 1]
Back to top