11import logging
22import os
33from typing import Any
4- from urllib .parse import urlparse
4+ from urllib .parse import urlparse , parse_qsl , urlencode , urlunparse
55import uuid
66import requests
77from tqdm import tqdm
@@ -18,7 +18,8 @@ class VulnCheckKevManager(CISAKevManager, relationship_note="cve-vulncheck-kev")
1818 """
1919 content_fmt = "Vulncheck KEV: {cve_id}"
2020 CHUNK_SIZE = 1500
21- UPLOAD_CHUNK_SIZE = 2500
21+ UPLOAD_CHUNK_SIZE = 500
22+ UPDATE_CHUNK_SIZE = 500
2223
2324 def __init__ (self , * args , ** kwargs ):
2425 super ().__init__ (* args , ** kwargs )
@@ -61,14 +62,27 @@ def get_all_kevs(self):
6162 if meta ["last_item" ] >= meta ["total_documents" ]:
6263 break
6364
65+ @staticmethod
66+ def sanitize_url (url ):
67+ """ this function removes #frgments and day=/date= queries from the link"""
68+ parsed = urlparse (url )
69+ qs = parse_qsl (parsed .query , keep_blank_values = True )
70+ filtered = [(k , v ) for (k , v ) in qs if k .lower () not in ("day" , "date" )]
71+ new_query = urlencode (filtered , doseq = True )
72+ new_parsed = parsed ._replace (query = new_query , fragment = "" )
73+ return urlunparse (new_parsed )
74+
6475 def get_additional_refs (self , kev_obj ):
65- for reported in kev_obj ["vulncheck_reported_exploitation" ]:
76+ refs = {}
77+ for reported in sorted (kev_obj ["vulncheck_reported_exploitation" ], key = lambda x : x ["date_added" ]):
78+ ref_url = self .sanitize_url (reported ["url" ])
6679 ref = dict (
67- url = reported [ "url" ] ,
80+ url = ref_url ,
6881 description = f"Added on: { reported ['date_added' ]} " ,
6982 source_name = urlparse (reported ["url" ]).hostname ,
7083 )
71- yield ref
84+ refs [ref_url ] = ref
85+ return reversed (refs .values ()) #return descending
7286
7387 def get_dates (self , cve ):
7488 kev_obj = cve ['kev' ]
0 commit comments