From 8f9c1be97466a94f6a7c0d8b6653e5267978a679 Mon Sep 17 00:00:00 2001 From: Project Nayuki Date: Wed, 28 Jul 2021 19:55:24 +0000 Subject: [PATCH] Added new public functions QrSegment.{isNumeric(),isAlphanumeric()} and privatized the regexes in {Java, TypeScript, Python}, because the regexes are awkward compared to a clean abstraction. --- .../java/io/nayuki/qrcodegen/QrSegment.java | 50 +++++++++++++------ python/qrcodegen.py | 38 ++++++++------ typescript-javascript/qrcodegen.ts | 36 ++++++++----- 3 files changed, 82 insertions(+), 42 deletions(-) diff --git a/java/src/main/java/io/nayuki/qrcodegen/QrSegment.java b/java/src/main/java/io/nayuki/qrcodegen/QrSegment.java index f416200..73ccbfa 100644 --- a/java/src/main/java/io/nayuki/qrcodegen/QrSegment.java +++ b/java/src/main/java/io/nayuki/qrcodegen/QrSegment.java @@ -74,7 +74,7 @@ public final class QrSegment { */ public static QrSegment makeNumeric(String digits) { Objects.requireNonNull(digits); - if (!NUMERIC_REGEX.matcher(digits).matches()) + if (!isNumeric(digits)) throw new IllegalArgumentException("String contains non-numeric characters"); BitBuffer bb = new BitBuffer(); @@ -98,7 +98,7 @@ public final class QrSegment { */ public static QrSegment makeAlphanumeric(String text) { Objects.requireNonNull(text); - if (!ALPHANUMERIC_REGEX.matcher(text).matches()) + if (!isAlphanumeric(text)) throw new IllegalArgumentException("String contains unencodable characters in alphanumeric mode"); BitBuffer bb = new BitBuffer(); @@ -127,9 +127,9 @@ public final class QrSegment { // Select the most efficient segment encoding automatically List result = new ArrayList<>(); if (text.equals("")); // Leave result empty - else if (NUMERIC_REGEX.matcher(text).matches()) + else if (isNumeric(text)) result.add(makeNumeric(text)); - else if (ALPHANUMERIC_REGEX.matcher(text).matches()) + else if (isAlphanumeric(text)) result.add(makeAlphanumeric(text)); else result.add(makeBytes(text.getBytes(StandardCharsets.UTF_8))); @@ -162,6 +162,33 @@ public final class QrSegment { } + /** + * Tests whether the specified string can be encoded as a segment in numeric mode. + * A string is encodable iff each character is in the range 0 to 9. + * @param text the string to test for encodability (not {@code null}) + * @return {@code true} iff each character is in the range 0 to 9. + * @throws NullPointerException if the string is {@code null} + * @see #makeNumeric(String) + */ + public static boolean isNumeric(String text) { + return NUMERIC_REGEX.matcher(text).matches(); + } + + + /** + * Tests whether the specified string can be encoded as a segment in alphanumeric mode. + * A string is encodable iff each character is in the following set: 0 to 9, A to Z + * (uppercase only), space, dollar, percent, asterisk, plus, hyphen, period, slash, colon. + * @param text the string to test for encodability (not {@code null}) + * @return {@code true} iff each character is in the alphanumeric mode character set + * @throws NullPointerException if the string is {@code null} + * @see #makeAlphanumeric(String) + */ + public static boolean isAlphanumeric(String text) { + return ALPHANUMERIC_REGEX.matcher(text).matches(); + } + + /*---- Instance fields ----*/ @@ -231,18 +258,11 @@ public final class QrSegment { /*---- Constants ----*/ - /** Describes precisely all strings that are encodable in numeric mode. To test whether a - * string {@code s} is encodable: {@code boolean ok = NUMERIC_REGEX.matcher(s).matches();}. - * A string is encodable iff each character is in the range 0 to 9. - * @see #makeNumeric(String) */ - public static final Pattern NUMERIC_REGEX = Pattern.compile("[0-9]*"); + // Describes precisely all strings that are encodable in numeric mode. + private static final Pattern NUMERIC_REGEX = Pattern.compile("[0-9]*"); - /** Describes precisely all strings that are encodable in alphanumeric mode. To test whether a - * string {@code s} is encodable: {@code boolean ok = ALPHANUMERIC_REGEX.matcher(s).matches();}. - * A string is encodable iff each character is in the following set: 0 to 9, A to Z - * (uppercase only), space, dollar, percent, asterisk, plus, hyphen, period, slash, colon. - * @see #makeAlphanumeric(String) */ - public static final Pattern ALPHANUMERIC_REGEX = Pattern.compile("[A-Z0-9 $%*+./:-]*"); + // Describes precisely all strings that are encodable in alphanumeric mode. + private static final Pattern ALPHANUMERIC_REGEX = Pattern.compile("[A-Z0-9 $%*+./:-]*"); // The set of all legal characters in alphanumeric mode, where // each character value maps to the index in the string. diff --git a/python/qrcodegen.py b/python/qrcodegen.py index ef9eb05..dba11ca 100644 --- a/python/qrcodegen.py +++ b/python/qrcodegen.py @@ -689,7 +689,7 @@ class QrSegment: @staticmethod def make_numeric(digits: str) -> QrSegment: """Returns a segment representing the given string of decimal digits encoded in numeric mode.""" - if QrSegment.NUMERIC_REGEX.fullmatch(digits) is None: + if not QrSegment.is_numeric(digits): raise ValueError("String contains non-numeric characters") bb = _BitBuffer() i: int = 0 @@ -705,7 +705,7 @@ class QrSegment: """Returns a segment representing the given text string encoded in alphanumeric mode. The characters allowed are: 0 to 9, A to Z (uppercase only), space, dollar, percent, asterisk, plus, hyphen, period, slash, colon.""" - if QrSegment.ALPHANUMERIC_REGEX.fullmatch(text) is None: + if not QrSegment.is_alphanumeric(text): raise ValueError("String contains unencodable characters in alphanumeric mode") bb = _BitBuffer() for i in range(0, len(text) - 1, 2): # Process groups of 2 @@ -727,9 +727,9 @@ class QrSegment: # Select the most efficient segment encoding automatically if text == "": return [] - elif QrSegment.NUMERIC_REGEX.fullmatch(text) is not None: + elif QrSegment.is_numeric(text): return [QrSegment.make_numeric(text)] - elif QrSegment.ALPHANUMERIC_REGEX.fullmatch(text) is not None: + elif QrSegment.is_alphanumeric(text): return [QrSegment.make_alphanumeric(text)] else: return [QrSegment.make_bytes(text.encode("UTF-8"))] @@ -755,6 +755,21 @@ class QrSegment: return QrSegment(QrSegment.Mode.ECI, 0, bb) + # Tests whether the given string can be encoded as a segment in numeric mode. + # A string is encodable iff each character is in the range 0 to 9. + @staticmethod + def is_numeric(text: str) -> bool: + return QrSegment._NUMERIC_REGEX.fullmatch(text) is not None + + + # Tests whether the given string can be encoded as a segment in alphanumeric mode. + # A string is encodable iff each character is in the following set: 0 to 9, A to Z + # (uppercase only), space, dollar, percent, asterisk, plus, hyphen, period, slash, colon. + @staticmethod + def is_alphanumeric(text: str) -> bool: + return QrSegment._ALPHANUMERIC_REGEX.fullmatch(text) is not None + + # ---- Private fields ---- # The mode indicator of this segment. Accessed through get_mode(). @@ -817,18 +832,13 @@ class QrSegment: # ---- Constants ---- - # (Public) Describes precisely all strings that are encodable in numeric mode. - # To test whether a string s is encodable: ok = NUMERIC_REGEX.fullmatch(s) is not None - # A string is encodable iff each character is in the range 0 to 9. - NUMERIC_REGEX: re.Pattern = re.compile(r"[0-9]*") + # Describes precisely all strings that are encodable in numeric mode. + _NUMERIC_REGEX: re.Pattern = re.compile(r"[0-9]*") - # (Public) Describes precisely all strings that are encodable in alphanumeric mode. - # To test whether a string s is encodable: ok = ALPHANUMERIC_REGEX.fullmatch(s) is not None - # A string is encodable iff each character is in the following set: 0 to 9, A to Z - # (uppercase only), space, dollar, percent, asterisk, plus, hyphen, period, slash, colon. - ALPHANUMERIC_REGEX: re.Pattern = re.compile(r"[A-Z0-9 $%*+./:-]*") + # Describes precisely all strings that are encodable in alphanumeric mode. + _ALPHANUMERIC_REGEX: re.Pattern = re.compile(r"[A-Z0-9 $%*+./:-]*") - # (Private) Dictionary of "0"->0, "A"->10, "$"->37, etc. + # Dictionary of "0"->0, "A"->10, "$"->37, etc. _ALPHANUMERIC_ENCODING_TABLE: Dict[str,int] = {ch: i for (i, ch) in enumerate("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ $%*+-./:")} diff --git a/typescript-javascript/qrcodegen.ts b/typescript-javascript/qrcodegen.ts index dece093..65595ea 100644 --- a/typescript-javascript/qrcodegen.ts +++ b/typescript-javascript/qrcodegen.ts @@ -756,7 +756,7 @@ namespace qrcodegen { // Returns a segment representing the given string of decimal digits encoded in numeric mode. public static makeNumeric(digits: string): QrSegment { - if (!this.NUMERIC_REGEX.test(digits)) + if (!QrSegment.isNumeric(digits)) throw "String contains non-numeric characters"; let bb: Array = [] for (let i = 0; i < digits.length; ) { // Consume up to 3 digits per iteration @@ -772,7 +772,7 @@ namespace qrcodegen { // The characters allowed are: 0 to 9, A to Z (uppercase only), space, // dollar, percent, asterisk, plus, hyphen, period, slash, colon. public static makeAlphanumeric(text: string): QrSegment { - if (!this.ALPHANUMERIC_REGEX.test(text)) + if (!QrSegment.isAlphanumeric(text)) throw "String contains unencodable characters in alphanumeric mode"; let bb: Array = [] let i: int; @@ -793,9 +793,9 @@ namespace qrcodegen { // Select the most efficient segment encoding automatically if (text == "") return []; - else if (this.NUMERIC_REGEX.test(text)) + else if (QrSegment.isNumeric(text)) return [QrSegment.makeNumeric(text)]; - else if (this.ALPHANUMERIC_REGEX.test(text)) + else if (QrSegment.isAlphanumeric(text)) return [QrSegment.makeAlphanumeric(text)]; else return [QrSegment.makeBytes(QrSegment.toUtf8ByteArray(text))]; @@ -822,6 +822,21 @@ namespace qrcodegen { } + // Tests whether the given string can be encoded as a segment in numeric mode. + // A string is encodable iff each character is in the range 0 to 9. + public static isNumeric(text: string): boolean { + return QrSegment.NUMERIC_REGEX.test(text); + } + + + // Tests whether the given string can be encoded as a segment in alphanumeric mode. + // A string is encodable iff each character is in the following set: 0 to 9, A to Z + // (uppercase only), space, dollar, percent, asterisk, plus, hyphen, period, slash, colon. + public static isAlphanumeric(text: string): boolean { + return QrSegment.ALPHANUMERIC_REGEX.test(text); + } + + /*-- Constructor (low level) and fields --*/ // Creates a new QR Code segment with the given attributes and data. @@ -885,16 +900,11 @@ namespace qrcodegen { /*-- Constants --*/ - // Describes precisely all strings that are encodable in numeric mode. To test - // whether a string s is encodable: let ok: boolean = NUMERIC_REGEX.test(s); - // A string is encodable iff each character is in the range 0 to 9. - public static readonly NUMERIC_REGEX: RegExp = /^[0-9]*$/; + // Describes precisely all strings that are encodable in numeric mode. + private static readonly NUMERIC_REGEX: RegExp = /^[0-9]*$/; - // Describes precisely all strings that are encodable in alphanumeric mode. To test - // whether a string s is encodable: let ok: boolean = ALPHANUMERIC_REGEX.test(s); - // A string is encodable iff each character is in the following set: 0 to 9, A to Z - // (uppercase only), space, dollar, percent, asterisk, plus, hyphen, period, slash, colon. - public static readonly ALPHANUMERIC_REGEX: RegExp = /^[A-Z0-9 $%*+.\/:-]*$/; + // Describes precisely all strings that are encodable in alphanumeric mode. + private static readonly ALPHANUMERIC_REGEX: RegExp = /^[A-Z0-9 $%*+.\/:-]*$/; // The set of all legal characters in alphanumeric mode, // where each character value maps to the index in the string.