From 8c00260019973250916b533c15602d34bb187342 Mon Sep 17 00:00:00 2001 From: M66B Date: Mon, 31 Oct 2022 12:16:27 +0100 Subject: [PATCH] Allow jsoup text expressions --- FAQ.md | 9 ++++++++- .../main/java/eu/faircode/email/EntityRule.java | 17 +++++++++++++---- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/FAQ.md b/FAQ.md index 8a3c2f570d..1159c61811 100644 --- a/FAQ.md +++ b/FAQ.md @@ -2571,9 +2571,16 @@ Note that email addresses are formatted like this: "Somebody" `` -Note that message texts are normalized, which means that all whitespaces (spaces, tabs, line breaks, etc) are replaced by a single space. +Note that message texts are normalized when not using a regex, which means that all whitespaces (spaces, tabs, line breaks, etc) are replaced by a single space. This makes it easier to match texts on multiple lines or when the line break is at different places. +Since version 1.1996 it is possible to use [Jsoup selectors](https://jsoup.org/cookbook/extracting-data/selector-syntax) to match HTML elements, +by prefixing the selector by *jsoup:* and entering it as text contains condition, like for example: + +``` +html > body > div > a[href=https://example.org] +``` + You can use multiple rules, possibly with a *stop processing*, for an *or* or a *not* condition. Matching is not case sensitive, unless you use [regular expressions](https://en.wikipedia.org/wiki/Regular_expression). diff --git a/app/src/main/java/eu/faircode/email/EntityRule.java b/app/src/main/java/eu/faircode/email/EntityRule.java index 9af54708ad..00fe7f4350 100644 --- a/app/src/main/java/eu/faircode/email/EntityRule.java +++ b/app/src/main/java/eu/faircode/email/EntityRule.java @@ -126,6 +126,7 @@ public class EntityRule { static final String EXTRA_SUBJECT = "subject"; static final String EXTRA_RECEIVED = "received"; + private static final String JSOUP_PREFIX = "jsoup:"; private static final long SEND_DELAY = 5000L; // milliseconds private static final ExecutorService executor = Helper.getBackgroundExecutor(1, "rule"); @@ -365,7 +366,9 @@ public class EntityRule { boolean regex = jbody.getBoolean("regex"); boolean skip_quotes = jbody.optBoolean("skip_quotes"); - if (!regex) + boolean jsoup = value.startsWith(JSOUP_PREFIX); + + if (!regex && !jsoup) value = value.replaceAll("\\s+", " "); if (html == null && message.content) { @@ -386,9 +389,15 @@ public class EntityRule { Document d = JsoupEx.parse(html); if (skip_quotes) d.select("blockquote").remove(); - String text = d.body().text(); - if (!matches(context, message, value, text, regex)) - return false; + if (jsoup) { + String selector = value.substring(JSOUP_PREFIX.length()); + if (d.select(selector).size() == 0) + return false; + } else { + String text = d.body().text(); + if (!matches(context, message, value, text, regex)) + return false; + } } // Date