Skip to content

Commit c490f8b

Browse files
authored
remove duplicate urls in vulncheck-kev.external_references (#114)
* remove duplicate urls in vulncheck-kev.external_references #113 * fix test * bump
1 parent f2d04ce commit c490f8b

File tree

3 files changed

+29
-11
lines changed

3 files changed

+29
-11
lines changed

arango_cve_processor/managers/cve_kev_vulncheck.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import logging
22
import os
33
from typing import Any
4-
from urllib.parse import urlparse
4+
from urllib.parse import urlparse, parse_qsl, urlencode, urlunparse
55
import uuid
66
import requests
77
from tqdm import tqdm
@@ -18,7 +18,8 @@ class VulnCheckKevManager(CISAKevManager, relationship_note="cve-vulncheck-kev")
1818
"""
1919
content_fmt = "Vulncheck KEV: {cve_id}"
2020
CHUNK_SIZE = 1500
21-
UPLOAD_CHUNK_SIZE = 2500
21+
UPLOAD_CHUNK_SIZE = 500
22+
UPDATE_CHUNK_SIZE = 500
2223

2324
def __init__(self, *args, **kwargs):
2425
super().__init__(*args, **kwargs)
@@ -61,14 +62,27 @@ def get_all_kevs(self):
6162
if meta["last_item"] >= meta["total_documents"]:
6263
break
6364

65+
@staticmethod
66+
def sanitize_url(url):
67+
""" this function removes #frgments and day=/date= queries from the link"""
68+
parsed = urlparse(url)
69+
qs = parse_qsl(parsed.query, keep_blank_values=True)
70+
filtered = [(k, v) for (k, v) in qs if k.lower() not in ("day", "date")]
71+
new_query = urlencode(filtered, doseq=True)
72+
new_parsed = parsed._replace(query=new_query, fragment="")
73+
return urlunparse(new_parsed)
74+
6475
def get_additional_refs(self, kev_obj):
65-
for reported in kev_obj["vulncheck_reported_exploitation"]:
76+
refs = {}
77+
for reported in sorted(kev_obj["vulncheck_reported_exploitation"], key=lambda x: x["date_added"]):
78+
ref_url = self.sanitize_url(reported["url"])
6679
ref = dict(
67-
url=reported["url"],
80+
url=ref_url,
6881
description=f"Added on: {reported['date_added']}",
6982
source_name=urlparse(reported["url"]).hostname,
7083
)
71-
yield ref
84+
refs[ref_url] = ref
85+
return reversed(refs.values()) #return descending
7286

7387
def get_dates(self, cve):
7488
kev_obj = cve['kev']

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "arango_cve_processor"
7-
version = "1.4.5"
7+
version = "1.4.6"
88
authors = [
99
{ name = "dogesec" }
1010
]

tests/unit/managers/test_cve_kev_vulncheck.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ def test_relate_single(vulncheck_kev_manager, patched_retriever):
4242
}
4343
],
4444
"vulncheck_reported_exploitation": [
45+
{
46+
"url": "https://dashboard.shadowserver.org/statistics/honeypot/vulnerability/map/?day=2025-11-29&host_type=src&vulnerability=cve-2024-53704",
47+
"date_added": "2025-11-29T00:00:00Z",
48+
},
4549
{
4650
"url": "https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json",
4751
"date_added": "2025-02-18T00:00:00Z",
@@ -97,16 +101,16 @@ def test_relate_single(vulncheck_kev_manager, patched_retriever):
97101
"description": "Apply mitigations per vendor instructions or discontinue use of the product if mitigations are unavailable.",
98102
},
99103
{"source_name": "action_due", "description": "2025-03-11T00:00:00Z"},
104+
{
105+
"url": "https://dashboard.shadowserver.org/statistics/honeypot/vulnerability/map/?host_type=src&vulnerability=cve-2024-53704",
106+
"description": "Added on: 2025-11-29T00:00:00Z",
107+
"source_name": "dashboard.shadowserver.org",
108+
},
100109
{
101110
"url": "https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json",
102111
"description": "Added on: 2025-02-18T00:00:00Z",
103112
"source_name": "www.cisa.gov",
104113
},
105-
{
106-
"url": "https://dashboard.shadowserver.org/statistics/honeypot/vulnerability/map/?day=2025-02-24&host_type=src&vulnerability=cve-2024-53704",
107-
"description": "Added on: 2025-02-24T00:00:00Z",
108-
"source_name": "dashboard.shadowserver.org",
109-
},
110114
{
111115
"source_name": "cwe",
112116
"url": "http://cwe.mitre.org/data/definitions/94.html",

0 commit comments

Comments
 (0)