Filter Jsoup stream

pull/174/head
M66B 6 years ago
parent 4113ad9a37
commit f375b39df7

@ -24,14 +24,17 @@ import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import java.io.File; import java.io.File;
import java.io.FileInputStream;
import java.io.FilterInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
public class JsoupEx { public class JsoupEx {
static Document parse(String html) { static Document parse(String html) {
try {
/* /*
org.jsoup.UncheckedIOException: java.io.IOException: Input is binary and unsupported org.jsoup.UncheckedIOException: java.io.IOException: Input is binary and unsupported
at org.jsoup.parser.CharacterReader.<init>(SourceFile:38) at org.jsoup.parser.CharacterReader.<init>(SourceFile:38)
at org.jsoup.parser.CharacterReader.<init>(SourceFile:43) at org.jsoup.parser.CharacterReader.<init>(SourceFile:43)
at org.jsoup.parser.TreeBuilder.initialiseParse(SourceFile:38) at org.jsoup.parser.TreeBuilder.initialiseParse(SourceFile:38)
@ -40,7 +43,6 @@ org.jsoup.UncheckedIOException: java.io.IOException: Input is binary and unsuppo
at org.jsoup.parser.Parser.parse(SourceFile:107) at org.jsoup.parser.Parser.parse(SourceFile:107)
at org.jsoup.Jsoup.parse(SourceFile:58) at org.jsoup.Jsoup.parse(SourceFile:58)
*/ */
try {
return Jsoup.parse(html.replace("\0", "")); return Jsoup.parse(html.replace("\0", ""));
} catch (OutOfMemoryError ex) { } catch (OutOfMemoryError ex) {
Log.e(ex); Log.e(ex);
@ -53,6 +55,40 @@ org.jsoup.UncheckedIOException: java.io.IOException: Input is binary and unsuppo
} }
static Document parse(File in) throws IOException { static Document parse(File in) throws IOException {
return Jsoup.parse(in, StandardCharsets.UTF_8.name()); try (InputStream is = new FileInputStream(in)) {
return Jsoup.parse(new FilteredStream(is), StandardCharsets.UTF_8.name(), "");
}
}
private static class FilteredStream extends FilterInputStream {
protected FilteredStream(InputStream in) {
super(in);
}
@Override
public int read() throws IOException {
int b = super.read();
while (b == 0)
b = super.read();
return b;
}
@Override
public int read(byte[] buffer) throws IOException {
return read(buffer, 0, buffer.length);
}
@Override
public int read(byte[] buffer, int off, int len) throws IOException {
int b;
int c = 0;
while (c < len) {
b = read();
if (b < 0)
return (c == 0 ? -1 : c);
buffer[off + c++] = (byte) b;
}
return c;
}
} }
} }

Loading…
Cancel
Save