Allow jsoup text expressions

pull/209/head
M66B 2 years ago
parent e65d7b9c75
commit 8c00260019

@ -2571,9 +2571,16 @@ Note that email addresses are formatted like this:
"Somebody" <somebody@example.org> "Somebody" <somebody@example.org>
`` ``
Note that message texts are normalized, which means that all whitespaces (spaces, tabs, line breaks, etc) are replaced by a single space. Note that message texts are normalized when not using a regex, which means that all whitespaces (spaces, tabs, line breaks, etc) are replaced by a single space.
This makes it easier to match texts on multiple lines or when the line break is at different places. This makes it easier to match texts on multiple lines or when the line break is at different places.
Since version 1.1996 it is possible to use [Jsoup selectors](https://jsoup.org/cookbook/extracting-data/selector-syntax) to match HTML elements,
by prefixing the selector by *jsoup:* and entering it as text contains condition, like for example:
```
html > body > div > a[href=https://example.org]
```
You can use multiple rules, possibly with a *stop processing*, for an *or* or a *not* condition. You can use multiple rules, possibly with a *stop processing*, for an *or* or a *not* condition.
Matching is not case sensitive, unless you use [regular expressions](https://en.wikipedia.org/wiki/Regular_expression). Matching is not case sensitive, unless you use [regular expressions](https://en.wikipedia.org/wiki/Regular_expression).

@ -126,6 +126,7 @@ public class EntityRule {
static final String EXTRA_SUBJECT = "subject"; static final String EXTRA_SUBJECT = "subject";
static final String EXTRA_RECEIVED = "received"; static final String EXTRA_RECEIVED = "received";
private static final String JSOUP_PREFIX = "jsoup:";
private static final long SEND_DELAY = 5000L; // milliseconds private static final long SEND_DELAY = 5000L; // milliseconds
private static final ExecutorService executor = Helper.getBackgroundExecutor(1, "rule"); private static final ExecutorService executor = Helper.getBackgroundExecutor(1, "rule");
@ -365,7 +366,9 @@ public class EntityRule {
boolean regex = jbody.getBoolean("regex"); boolean regex = jbody.getBoolean("regex");
boolean skip_quotes = jbody.optBoolean("skip_quotes"); boolean skip_quotes = jbody.optBoolean("skip_quotes");
if (!regex) boolean jsoup = value.startsWith(JSOUP_PREFIX);
if (!regex && !jsoup)
value = value.replaceAll("\\s+", " "); value = value.replaceAll("\\s+", " ");
if (html == null && message.content) { if (html == null && message.content) {
@ -386,9 +389,15 @@ public class EntityRule {
Document d = JsoupEx.parse(html); Document d = JsoupEx.parse(html);
if (skip_quotes) if (skip_quotes)
d.select("blockquote").remove(); d.select("blockquote").remove();
String text = d.body().text(); if (jsoup) {
if (!matches(context, message, value, text, regex)) String selector = value.substring(JSOUP_PREFIX.length());
return false; if (d.select(selector).size() == 0)
return false;
} else {
String text = d.body().text();
if (!matches(context, message, value, text, regex))
return false;
}
} }
// Date // Date

Loading…
Cancel
Save