From f375b39df7306c52ed2d3092ea6989e59725b5e8 Mon Sep 17 00:00:00 2001 From: M66B Date: Fri, 21 Feb 2020 08:55:24 +0100 Subject: [PATCH] Filter Jsoup stream --- .../main/java/eu/faircode/email/JsoupEx.java | 58 +++++++++++++++---- 1 file changed, 47 insertions(+), 11 deletions(-) diff --git a/app/src/main/java/eu/faircode/email/JsoupEx.java b/app/src/main/java/eu/faircode/email/JsoupEx.java index c36b9e3e76..e34232bc5a 100644 --- a/app/src/main/java/eu/faircode/email/JsoupEx.java +++ b/app/src/main/java/eu/faircode/email/JsoupEx.java @@ -24,23 +24,25 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import java.io.File; +import java.io.FileInputStream; +import java.io.FilterInputStream; import java.io.IOException; +import java.io.InputStream; import java.nio.charset.StandardCharsets; public class JsoupEx { static Document parse(String html) { - + try { /* -org.jsoup.UncheckedIOException: java.io.IOException: Input is binary and unsupported - at org.jsoup.parser.CharacterReader.(SourceFile:38) - at org.jsoup.parser.CharacterReader.(SourceFile:43) - at org.jsoup.parser.TreeBuilder.initialiseParse(SourceFile:38) - at org.jsoup.parser.HtmlTreeBuilder.initialiseParse(SourceFile:65) - at org.jsoup.parser.TreeBuilder.parse(SourceFile:46) - at org.jsoup.parser.Parser.parse(SourceFile:107) - at org.jsoup.Jsoup.parse(SourceFile:58) + org.jsoup.UncheckedIOException: java.io.IOException: Input is binary and unsupported + at org.jsoup.parser.CharacterReader.(SourceFile:38) + at org.jsoup.parser.CharacterReader.(SourceFile:43) + at org.jsoup.parser.TreeBuilder.initialiseParse(SourceFile:38) + at org.jsoup.parser.HtmlTreeBuilder.initialiseParse(SourceFile:65) + at org.jsoup.parser.TreeBuilder.parse(SourceFile:46) + at org.jsoup.parser.Parser.parse(SourceFile:107) + at org.jsoup.Jsoup.parse(SourceFile:58) */ - try { return Jsoup.parse(html.replace("\0", "")); } catch (OutOfMemoryError ex) { Log.e(ex); @@ -53,6 +55,40 @@ org.jsoup.UncheckedIOException: java.io.IOException: Input is binary and unsuppo } static Document parse(File in) throws IOException { - return Jsoup.parse(in, StandardCharsets.UTF_8.name()); + try (InputStream is = new FileInputStream(in)) { + return Jsoup.parse(new FilteredStream(is), StandardCharsets.UTF_8.name(), ""); + } + } + + private static class FilteredStream extends FilterInputStream { + protected FilteredStream(InputStream in) { + super(in); + } + + @Override + public int read() throws IOException { + int b = super.read(); + while (b == 0) + b = super.read(); + return b; + } + + @Override + public int read(byte[] buffer) throws IOException { + return read(buffer, 0, buffer.length); + } + + @Override + public int read(byte[] buffer, int off, int len) throws IOException { + int b; + int c = 0; + while (c < len) { + b = read(); + if (b < 0) + return (c == 0 ? -1 : c); + buffer[off + c++] = (byte) b; + } + return c; + } } }