mirror of https://github.com/M66B/FairEmail.git
parent
9276018795
commit
b03f0ebe6b
@ -0,0 +1,87 @@
|
||||
package com.twitter.elephantbird.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* An efficient stream searching class based on the Knuth-Morris-Pratt algorithm.
|
||||
* For more on the algorithm works see: https://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm.
|
||||
*/
|
||||
public class StreamSearcher {
|
||||
|
||||
protected byte[] pattern_;
|
||||
protected int[] borders_;
|
||||
|
||||
// An upper bound on pattern length for searching. Results are undefined for longer patterns.
|
||||
public static final int MAX_PATTERN_LENGTH = 1024;
|
||||
|
||||
public StreamSearcher(byte[] pattern) {
|
||||
setPattern(pattern);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a new pattern for this StreamSearcher to use.
|
||||
* @param pattern
|
||||
* the pattern the StreamSearcher will look for in future calls to search(...)
|
||||
*/
|
||||
public void setPattern(byte[] pattern) {
|
||||
pattern_ = Arrays.copyOf(pattern, pattern.length);
|
||||
borders_ = new int[pattern_.length + 1];
|
||||
preProcess();
|
||||
}
|
||||
|
||||
/**
|
||||
* Searches for the next occurrence of the pattern in the stream, starting from the current stream position. Note
|
||||
* that the position of the stream is changed. If a match is found, the stream points to the end of the match -- i.e. the
|
||||
* byte AFTER the pattern. Else, the stream is entirely consumed. The latter is because InputStream semantics make it difficult to have
|
||||
* another reasonable default, i.e. leave the stream unchanged.
|
||||
*
|
||||
* @return bytes consumed if found, -1 otherwise.
|
||||
* @throws IOException
|
||||
*/
|
||||
public long search(InputStream stream) throws IOException {
|
||||
long bytesRead = 0;
|
||||
|
||||
int b;
|
||||
int j = 0;
|
||||
|
||||
while ((b = stream.read()) != -1) {
|
||||
bytesRead++;
|
||||
|
||||
while (j >= 0 && (byte)b != pattern_[j]) {
|
||||
j = borders_[j];
|
||||
}
|
||||
// Move to the next character in the pattern.
|
||||
++j;
|
||||
|
||||
// If we've matched up to the full pattern length, we found it. Return,
|
||||
// which will automatically save our position in the InputStream at the point immediately
|
||||
// following the pattern match.
|
||||
if (j == pattern_.length) {
|
||||
return bytesRead;
|
||||
}
|
||||
}
|
||||
|
||||
// No dice, Note that the stream is now completely consumed.
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds up a table of longest "borders" for each prefix of the pattern to find. This table is stored internally
|
||||
* and aids in implementation of the Knuth-Moore-Pratt string search.
|
||||
* <p>
|
||||
* For more information, see: https://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm.
|
||||
*/
|
||||
protected void preProcess() {
|
||||
int i = 0;
|
||||
int j = -1;
|
||||
borders_[i] = j;
|
||||
while (i < pattern_.length) {
|
||||
while (j >= 0 && pattern_[i] != pattern_[j]) {
|
||||
j = borders_[j];
|
||||
}
|
||||
borders_[++i] = ++j;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in new issue