From 1bd68b33897706f052163f0fd36c5e6a645d3216 Mon Sep 17 00:00:00 2001 From: Evans Castonguay Date: Fri, 9 Jan 2026 22:16:58 -0500 Subject: [PATCH 1/4] fix: support custom GitLab host clone URLs --- pr_agent/git_providers/gitlab_provider.py | 44 +++++++++++++++-------- tests/unittest/test_gitlab_provider.py | 25 +++++++++++++ 2 files changed, 54 insertions(+), 15 deletions(-) diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index e9db1a3740..3233f2e3b8 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -954,22 +954,36 @@ def generate_link_to_relevant_line_number(self, suggestion) -> str: return "" #Clone related def _prepare_clone_url_with_token(self, repo_url_to_clone: str) -> str | None: - if "gitlab." not in repo_url_to_clone: - get_logger().error(f"Repo URL: {repo_url_to_clone} is not a valid gitlab URL.") - return None - (scheme, base_url) = repo_url_to_clone.split("gitlab.") access_token = getattr(self.gl, 'oauth_token', None) or getattr(self.gl, 'private_token', None) - if not all([scheme, access_token, base_url]): - get_logger().error(f"Either no access token found, or repo URL: {repo_url_to_clone} " - f"is missing prefix: {scheme} and/or base URL: {base_url}.") + if not access_token: + get_logger().error("No access token found for GitLab clone.") return None - #Note that the ""official"" method found here: - # https://docs.gitlab.com/user/profile/personal_access_tokens/#clone-repository-using-personal-access-token - # requires a username, which may not be applicable. - # The following solution is taken from: https://stackoverflow.com/questions/25409700/using-gitlab-token-to-clone-without-authentication/35003812#35003812 - # For example: For repo url: https://gitlab.codium-inc.com/qodo/autoscraper.git - # Then to clone one will issue: 'git clone https://oauth2:@gitlab.codium-inc.com/qodo/autoscraper.git' + # Note: GitLab instances are not always hosted under a gitlab.* domain. + # Build a clone URL that works with any host (e.g., gitlab.example.com). + if repo_url_to_clone.startswith(("http://", "https://")): + try: + from urllib.parse import urlparse + parsed = urlparse(repo_url_to_clone) + if not parsed.scheme or not parsed.netloc: + raise ValueError("missing scheme or host") + netloc = parsed.netloc.split("@")[-1] + return f"{parsed.scheme}://oauth2:{access_token}@{netloc}{parsed.path}" + except Exception as exc: + get_logger().error( + f"Repo URL: {repo_url_to_clone} could not be parsed for clone.", + artifact={"error": str(exc)}, + ) + return None - clone_url = f"{scheme}oauth2:{access_token}@gitlab.{base_url}" - return clone_url + # Fallback to legacy gitlab.* parsing when a raw URL is provided. + if "gitlab." not in repo_url_to_clone: + get_logger().error(f"Repo URL: {repo_url_to_clone} is not a valid gitlab URL.") + return None + scheme, base_url = repo_url_to_clone.split("gitlab.") + if not all([scheme, base_url]): + get_logger().error( + f"Repo URL: {repo_url_to_clone} is missing prefix: {scheme} and/or base URL: {base_url}." + ) + return None + return f"{scheme}oauth2:{access_token}@gitlab.{base_url}" diff --git a/tests/unittest/test_gitlab_provider.py b/tests/unittest/test_gitlab_provider.py index dadc0903cb..c893ffc53c 100644 --- a/tests/unittest/test_gitlab_provider.py +++ b/tests/unittest/test_gitlab_provider.py @@ -35,6 +35,7 @@ def gitlab_provider(self, mock_gitlab_client, mock_project): provider = GitLabProvider("https://gitlab.com/test/repo/-/merge_requests/1") provider.gl = mock_gitlab_client provider.id_project = "test/repo" + provider.gl.oauth_token = "fake_token" return provider def test_get_pr_file_content_success(self, gitlab_provider, mock_project): @@ -192,3 +193,27 @@ def test_compare_submodule_cached(self, gitlab_provider): assert first == second == [{"diff": "d"}] m_pbp.assert_called_once_with("grp/repo") proj.repository_compare.assert_called_once_with("old", "new") + + def test_prepare_clone_url_with_token_gitlab_com(self, gitlab_provider): + gitlab_provider.gl.oauth_token = "token123" + repo_url = "https://gitlab.com/group/repo.git" + + result = gitlab_provider._prepare_clone_url_with_token(repo_url) + + assert result == "https://oauth2:token123@gitlab.com/group/repo.git" + + def test_prepare_clone_url_with_token_custom_domain(self, gitlab_provider): + gitlab_provider.gl.oauth_token = "token123" + repo_url = "https://gitlab.example.com/group/repo.git" + + result = gitlab_provider._prepare_clone_url_with_token(repo_url) + + assert result == "https://oauth2:token123@gitlab.example.com/group/repo.git" + + def test_prepare_clone_url_with_token_invalid_url(self, gitlab_provider): + gitlab_provider.gl.oauth_token = "token123" + repo_url = "gitlab.example.com/group/repo.git" + + result = gitlab_provider._prepare_clone_url_with_token(repo_url) + + assert result is None From 2a2b92b44c47ad1cc5b6358ac5847e4851687e22 Mon Sep 17 00:00:00 2001 From: Evans Castonguay Date: Fri, 9 Jan 2026 23:12:10 -0500 Subject: [PATCH 2/4] fix: parse non-http GitLab clone URLs --- pr_agent/git_providers/gitlab_provider.py | 29 +++++++++++++++-------- tests/unittest/test_gitlab_provider.py | 16 +++++++++++++ 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index 3233f2e3b8..795cff8779 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -969,21 +969,30 @@ def _prepare_clone_url_with_token(self, repo_url_to_clone: str) -> str | None: raise ValueError("missing scheme or host") netloc = parsed.netloc.split("@")[-1] return f"{parsed.scheme}://oauth2:{access_token}@{netloc}{parsed.path}" - except Exception as exc: + except Exception as e: get_logger().error( f"Repo URL: {repo_url_to_clone} could not be parsed for clone.", - artifact={"error": str(exc)}, + artifact={"error": str(e)}, ) return None - # Fallback to legacy gitlab.* parsing when a raw URL is provided. - if "gitlab." not in repo_url_to_clone: - get_logger().error(f"Repo URL: {repo_url_to_clone} is not a valid gitlab URL.") - return None - scheme, base_url = repo_url_to_clone.split("gitlab.") - if not all([scheme, base_url]): + # Fallback for non-HTTP URLs (e.g., ssh or scp-style). + try: + if "@" in repo_url_to_clone and ":" in repo_url_to_clone and not repo_url_to_clone.startswith("ssh://"): + # Handle SCP-like URLs: git@gitlab.com:group/repo.git + repo_url_to_clone = "ssh://" + repo_url_to_clone.replace(":", "/", 1) + + from urllib.parse import urlparse + parsed = urlparse(repo_url_to_clone) + if not parsed.netloc: + raise ValueError("missing host") + + netloc = parsed.netloc.split("@")[-1] + scheme = parsed.scheme if parsed.scheme else "https" + return f"{scheme}://oauth2:{access_token}@{netloc}{parsed.path}" + except Exception as e: get_logger().error( - f"Repo URL: {repo_url_to_clone} is missing prefix: {scheme} and/or base URL: {base_url}." + f"Repo URL: {repo_url_to_clone} could not be parsed for clone.", + artifact={"error": str(e)}, ) return None - return f"{scheme}oauth2:{access_token}@gitlab.{base_url}" diff --git a/tests/unittest/test_gitlab_provider.py b/tests/unittest/test_gitlab_provider.py index c893ffc53c..f401e3ed70 100644 --- a/tests/unittest/test_gitlab_provider.py +++ b/tests/unittest/test_gitlab_provider.py @@ -217,3 +217,19 @@ def test_prepare_clone_url_with_token_invalid_url(self, gitlab_provider): result = gitlab_provider._prepare_clone_url_with_token(repo_url) assert result is None + + def test_prepare_clone_url_with_token_scp_style(self, gitlab_provider): + gitlab_provider.gl.oauth_token = "token123" + repo_url = "git@gitlab.example.com:group/repo.git" + + result = gitlab_provider._prepare_clone_url_with_token(repo_url) + + assert result == "ssh://oauth2:token123@gitlab.example.com/group/repo.git" + + def test_prepare_clone_url_with_token_ssh_url(self, gitlab_provider): + gitlab_provider.gl.oauth_token = "token123" + repo_url = "ssh://git@gitlab.example.com/group/repo.git" + + result = gitlab_provider._prepare_clone_url_with_token(repo_url) + + assert result == "ssh://oauth2:token123@gitlab.example.com/group/repo.git" From ca4882d3cec93fa40efb26c901776acff95cb90b Mon Sep 17 00:00:00 2001 From: Evans Castonguay Date: Fri, 9 Jan 2026 23:16:06 -0500 Subject: [PATCH 3/4] chore: reuse module urlparse in clone helper --- pr_agent/git_providers/gitlab_provider.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index 795cff8779..96eaa37d3e 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -963,7 +963,6 @@ def _prepare_clone_url_with_token(self, repo_url_to_clone: str) -> str | None: # Build a clone URL that works with any host (e.g., gitlab.example.com). if repo_url_to_clone.startswith(("http://", "https://")): try: - from urllib.parse import urlparse parsed = urlparse(repo_url_to_clone) if not parsed.scheme or not parsed.netloc: raise ValueError("missing scheme or host") @@ -982,7 +981,6 @@ def _prepare_clone_url_with_token(self, repo_url_to_clone: str) -> str | None: # Handle SCP-like URLs: git@gitlab.com:group/repo.git repo_url_to_clone = "ssh://" + repo_url_to_clone.replace(":", "/", 1) - from urllib.parse import urlparse parsed = urlparse(repo_url_to_clone) if not parsed.netloc: raise ValueError("missing host") From 8260c1bddd2a3c8faafb58edf62dfa0f1363efa8 Mon Sep 17 00:00:00 2001 From: Evans Castonguay Date: Fri, 9 Jan 2026 23:20:33 -0500 Subject: [PATCH 4/4] refactor: guard missing GitLab clone token --- pr_agent/git_providers/gitlab_provider.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index 96eaa37d3e..0578c58345 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -956,8 +956,7 @@ def generate_link_to_relevant_line_number(self, suggestion) -> str: def _prepare_clone_url_with_token(self, repo_url_to_clone: str) -> str | None: access_token = getattr(self.gl, 'oauth_token', None) or getattr(self.gl, 'private_token', None) if not access_token: - get_logger().error("No access token found for GitLab clone.") - return None + return self._log_missing_clone_token() # Note: GitLab instances are not always hosted under a gitlab.* domain. # Build a clone URL that works with any host (e.g., gitlab.example.com). @@ -994,3 +993,8 @@ def _prepare_clone_url_with_token(self, repo_url_to_clone: str) -> str | None: artifact={"error": str(e)}, ) return None + + @staticmethod + def _log_missing_clone_token() -> None: + get_logger().error("No access token found for GitLab clone.") + return None