From 580ece955f7e127094b21b40761be6b70ab62aef Mon Sep 17 00:00:00 2001 From: Kim Davies Date: Fri, 13 Sep 2024 19:19:32 -0700 Subject: [PATCH] Implement changes to UTS46 algorithm See https://www.unicode.org/reports/tr46/tr46-33.html#Validity_Criteria --- idna/core.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/idna/core.py b/idna/core.py index 9115f12..afe942c 100644 --- a/idna/core.py +++ b/idna/core.py @@ -9,7 +9,8 @@ _virama_combining_class = 9 _alabel_prefix = b"xn--" _unicode_dots_re = re.compile("[\u002e\u3002\uff0e\uff61]") - +_ldh = (48,49,50,51,52,53,54,55,56,57,95,97,98,99,100,101,102,103,104,105,106,107,108,109,110, + 111,112,113,114,115,116,117,118,119,120,121,122) class IDNAError(UnicodeError): """Base exception for all IDNA-encoding related problems""" @@ -341,16 +342,18 @@ def uts46_remap(domain: str, std3_rules: bool = True, transitional: bool = False uts46row = uts46data[code_point if code_point < 256 else bisect.bisect_left(uts46data, (code_point, "Z")) - 1] status = uts46row[1] replacement: Optional[str] = None + if std3_rules and code_point <= 0x7f: + if not code_point in _ldh: + raise InvalidCodepoint("Codepoint {} at position {} does not follow STD3 rules".format(_unot(code_point), pos + 1)) if len(uts46row) == 3: replacement = uts46row[2] if ( status == "V" or (status == "D" and not transitional) - or (status == "3" and not std3_rules and replacement is None) ): output += char elif replacement is not None and ( - status == "M" or (status == "3" and not std3_rules) or (status == "D" and transitional) + status == "M" or (status == "D" and transitional) ): output += replacement elif status != "I":