From 4fda7e7e748902d3ceee89a4f191c0a9c62b1d64 Mon Sep 17 00:00:00 2001 From: Tim Rogers Date: Wed, 26 Mar 2025 19:37:59 -0500 Subject: [PATCH 1/6] Updated osstatus_follow_request acct: regex to comply with RFC 7565 allowed characters --- bookwyrm/views/follow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bookwyrm/views/follow.py b/bookwyrm/views/follow.py index dcb1c695cd..b40eeeb8be 100644 --- a/bookwyrm/views/follow.py +++ b/bookwyrm/views/follow.py @@ -135,7 +135,7 @@ def ostatus_follow_request(request): """prepare an outgoing remote follow request""" uri = urllib.parse.unquote(request.GET.get("acct")) username_parts = re.search( - r"(?:^http(?:s?):\/\/)([\w\-\.]*)(?:.)*(?:(?:\/)([\w]*))", uri + r"(?:^http(?:s?):\/\/)([\w\-\.]*)(?:.)*(?:(?:\/)([A-Za-z0-9\-\.\_\~\!\$\&\'\(\)\*\+\,\;\=](?:[A-Za-z0-9\-\.\_\~\!\$\&\'\(\)\*\+\,\;\=]|(?:%[0-9A-Fa-f]{2}))*))", uri ) account = f"{username_parts[2]}@{username_parts[1]}" user = handle_remote_webfinger(account) From d83c767b5d7a7412adaaf30590728d01ff0a81c2 Mon Sep 17 00:00:00 2001 From: Tim Rogers Date: Wed, 26 Mar 2025 19:41:04 -0500 Subject: [PATCH 2/6] Fixed format error --- bookwyrm/views/follow.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bookwyrm/views/follow.py b/bookwyrm/views/follow.py index b40eeeb8be..944083b28a 100644 --- a/bookwyrm/views/follow.py +++ b/bookwyrm/views/follow.py @@ -135,7 +135,8 @@ def ostatus_follow_request(request): """prepare an outgoing remote follow request""" uri = urllib.parse.unquote(request.GET.get("acct")) username_parts = re.search( - r"(?:^http(?:s?):\/\/)([\w\-\.]*)(?:.)*(?:(?:\/)([A-Za-z0-9\-\.\_\~\!\$\&\'\(\)\*\+\,\;\=](?:[A-Za-z0-9\-\.\_\~\!\$\&\'\(\)\*\+\,\;\=]|(?:%[0-9A-Fa-f]{2}))*))", uri + r"(?:^http(?:s?):\/\/)([\w\-\.]*)(?:.)*(?:(?:\/)([A-Za-z0-9\-\.\_\~\!\$\&\'\(\)\*\+\,\;\=](?:[A-Za-z0-9\-\.\_\~\!\$\&\'\(\)\*\+\,\;\=]|(?:%[0-9A-Fa-f]{2}))*))", + uri, ) account = f"{username_parts[2]}@{username_parts[1]}" user = handle_remote_webfinger(account) From 024807b260e7dc3c6b71039b0333cc465c5f76f9 Mon Sep 17 00:00:00 2001 From: Tim Rogers Date: Sun, 30 Mar 2025 09:17:30 -0500 Subject: [PATCH 3/6] Moved remote user URL into regex utility --- bookwyrm/utils/regex.py | 2 ++ bookwyrm/views/follow.py | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/bookwyrm/utils/regex.py b/bookwyrm/utils/regex.py index 98bcde5ad7..f7a576efb8 100644 --- a/bookwyrm/utils/regex.py +++ b/bookwyrm/utils/regex.py @@ -3,9 +3,11 @@ DOMAIN = r"[\w_\-\.]+\.[a-z\-]{2,}" LOCALNAME = r"@?[a-zA-Z_\-\.0-9]+" STRICT_LOCALNAME = r"@[a-zA-Z_\-\.0-9]+" +REMOTENAME = r"[A-Za-z0-9\-\.\_\~\!\$\&\'\(\)\*\+\,\;\=](?:[A-Za-z0-9\-\.\_\~\!\$\&\'\(\)\*\+\,\;\=]|(?:%[0-9A-Fa-f]{2})){0,149}" USERNAME = rf"{LOCALNAME}(@{DOMAIN})?" STRICT_USERNAME = rf"(\B{STRICT_LOCALNAME}(@{DOMAIN})?\b)" FULL_USERNAME = rf"{LOCALNAME}@{DOMAIN}\b" +REMOTE_USER_URL = rf"(?:^http(?:s?):\/\/)([\w\-\.]*)(?:.)*(?:(?:\/)({REMOTENAME}))" SLUG = r"/s/(?P[-_a-z0-9]*)" HASHTAG = r"(#[^!@#$%^&*(),.?\":{}|<>\s]+)" # should match (BookWyrm/1.0.0; or (BookWyrm/99.1.2; diff --git a/bookwyrm/views/follow.py b/bookwyrm/views/follow.py index 944083b28a..5001ca9174 100644 --- a/bookwyrm/views/follow.py +++ b/bookwyrm/views/follow.py @@ -10,6 +10,7 @@ from bookwyrm import models from bookwyrm.models.relationship import clear_cache +from bookwyrm.utils import regex from .helpers import ( get_user_from_username, handle_remote_webfinger, @@ -135,7 +136,7 @@ def ostatus_follow_request(request): """prepare an outgoing remote follow request""" uri = urllib.parse.unquote(request.GET.get("acct")) username_parts = re.search( - r"(?:^http(?:s?):\/\/)([\w\-\.]*)(?:.)*(?:(?:\/)([A-Za-z0-9\-\.\_\~\!\$\&\'\(\)\*\+\,\;\=](?:[A-Za-z0-9\-\.\_\~\!\$\&\'\(\)\*\+\,\;\=]|(?:%[0-9A-Fa-f]{2}))*))", + regex.REMOTE_USER_URL, uri, ) account = f"{username_parts[2]}@{username_parts[1]}" From dc31e7698d37dfe30ab6e0d7f816838f86a2d998 Mon Sep 17 00:00:00 2001 From: Tim Rogers Date: Sun, 30 Mar 2025 09:31:35 -0500 Subject: [PATCH 4/6] Condensed REMOTENAME regex literals --- bookwyrm/utils/regex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bookwyrm/utils/regex.py b/bookwyrm/utils/regex.py index f7a576efb8..3c89a71327 100644 --- a/bookwyrm/utils/regex.py +++ b/bookwyrm/utils/regex.py @@ -3,7 +3,7 @@ DOMAIN = r"[\w_\-\.]+\.[a-z\-]{2,}" LOCALNAME = r"@?[a-zA-Z_\-\.0-9]+" STRICT_LOCALNAME = r"@[a-zA-Z_\-\.0-9]+" -REMOTENAME = r"[A-Za-z0-9\-\.\_\~\!\$\&\'\(\)\*\+\,\;\=](?:[A-Za-z0-9\-\.\_\~\!\$\&\'\(\)\*\+\,\;\=]|(?:%[0-9A-Fa-f]{2})){0,149}" +REMOTENAME = r"[\w\-\.\~\!\$\&\'\(\)\*\+\,\;\=](?:[\w\-\.\~\!\$\&\'\(\)\*\+\,\;\=]|(?:%[0-9A-Fa-f]{2})){0,149}" USERNAME = rf"{LOCALNAME}(@{DOMAIN})?" STRICT_USERNAME = rf"(\B{STRICT_LOCALNAME}(@{DOMAIN})?\b)" FULL_USERNAME = rf"{LOCALNAME}@{DOMAIN}\b" From c9406c40a4d86e728bf9e8a8a45679017cca8ab1 Mon Sep 17 00:00:00 2001 From: Tim Rogers Date: Sun, 30 Mar 2025 09:45:23 -0500 Subject: [PATCH 5/6] Split long REMOTENAME regex into multiline string for readability --- bookwyrm/utils/regex.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bookwyrm/utils/regex.py b/bookwyrm/utils/regex.py index 3c89a71327..9c2cb53176 100644 --- a/bookwyrm/utils/regex.py +++ b/bookwyrm/utils/regex.py @@ -3,7 +3,10 @@ DOMAIN = r"[\w_\-\.]+\.[a-z\-]{2,}" LOCALNAME = r"@?[a-zA-Z_\-\.0-9]+" STRICT_LOCALNAME = r"@[a-zA-Z_\-\.0-9]+" -REMOTENAME = r"[\w\-\.\~\!\$\&\'\(\)\*\+\,\;\=](?:[\w\-\.\~\!\$\&\'\(\)\*\+\,\;\=]|(?:%[0-9A-Fa-f]{2})){0,149}" +REMOTENAME = ( + r"[\w\-\.\~\!\$\&\'\(\)\*\+\,\;\=]" + r"(?:[\w\-\.\~\!\$\&\'\(\)\*\+\,\;\=]|(?:%[0-9A-Fa-f]{2})){0,149}" +) USERNAME = rf"{LOCALNAME}(@{DOMAIN})?" STRICT_USERNAME = rf"(\B{STRICT_LOCALNAME}(@{DOMAIN})?\b)" FULL_USERNAME = rf"{LOCALNAME}@{DOMAIN}\b" From 1d873a37cb8535532cd542efb9cef2a984566be8 Mon Sep 17 00:00:00 2001 From: Tim Rogers Date: Mon, 31 Mar 2025 07:13:10 -0500 Subject: [PATCH 6/6] Restricing REMOTENAME regex to just RFC 7565 characters that are URI safe --- bookwyrm/utils/regex.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/bookwyrm/utils/regex.py b/bookwyrm/utils/regex.py index 9c2cb53176..5d4bf77bf5 100644 --- a/bookwyrm/utils/regex.py +++ b/bookwyrm/utils/regex.py @@ -3,10 +3,7 @@ DOMAIN = r"[\w_\-\.]+\.[a-z\-]{2,}" LOCALNAME = r"@?[a-zA-Z_\-\.0-9]+" STRICT_LOCALNAME = r"@[a-zA-Z_\-\.0-9]+" -REMOTENAME = ( - r"[\w\-\.\~\!\$\&\'\(\)\*\+\,\;\=]" - r"(?:[\w\-\.\~\!\$\&\'\(\)\*\+\,\;\=]|(?:%[0-9A-Fa-f]{2})){0,149}" -) +REMOTENAME = r"[\w\-\.\~](?:[\w\-\.\~]|(?:%[0-9A-Fa-f]{2})){0,149}" USERNAME = rf"{LOCALNAME}(@{DOMAIN})?" STRICT_USERNAME = rf"(\B{STRICT_LOCALNAME}(@{DOMAIN})?\b)" FULL_USERNAME = rf"{LOCALNAME}@{DOMAIN}\b"