From 3d850d762c66690e5954ea54f37adb00a2a36bae Mon Sep 17 00:00:00 2001 From: lalalaurentiu Date: Mon, 1 Sep 2025 13:50:47 +0300 Subject: [PATCH 1/2] Add verification flag to URL requests in Infineon scraper for improved security --- sites/infineon.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sites/infineon.py b/sites/infineon.py index 1c0dae2..521c555 100644 --- a/sites/infineon.py +++ b/sites/infineon.py @@ -14,7 +14,7 @@ scraper = Scraper() -scraper.get_from_url(url, "JSON") +scraper.get_from_url(url=url,type= "JSON", verify=False) jobs = scraper.markup.get("positions") while len(jobs) > 0: @@ -37,7 +37,7 @@ start += 10 url = f"https://jobs.infineon.com/api/apply/v2/jobs?domain=infineon.com&start={start}&num={num}&location=Romania&pid=563808958979269&domain=infineon.com&sort_by=relevance&triggerGoButton=false" - scraper.get_from_url(url, "JSON") + scraper.get_from_url(url=url,type= "JSON", verify=False) jobs = scraper.markup.get("positions") publish_or_update(finaljobs) From a7203dda31c8878824d8d873295a55fd591ac81c Mon Sep 17 00:00:00 2001 From: lalalaurentiu Date: Fri, 5 Sep 2025 01:18:34 +0300 Subject: [PATCH 2/2] Update Primark scraper URL for job listings and adjust job link construction --- sites/primark.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sites/primark.py b/sites/primark.py index a42493d..f39eb32 100644 --- a/sites/primark.py +++ b/sites/primark.py @@ -3,7 +3,7 @@ from getCounty import GetCounty _counties = GetCounty() -url = "https://ro.cariera.primark.com/loc/romania-posturi-vacante/39017/798549/2" +url = "https://careers.primark.com/ro/search-jobs/results?ActiveFacetID=0&CurrentPage=1&RecordsPerPage=10&TotalContentResults=&Distance=50&RadiusUnitType=0&Keywords=&Location=&ShowRadius=False&IsPagination=False&CustomFacetName=&FacetTerm=798549&FacetType=2&FacetFilters%5B0%5D.ID=798549&FacetFilters%5B0%5D.FacetType=2&FacetFilters%5B0%5D.Count=3&FacetFilters%5B0%5D.Display=Rom%C3%A2nia&FacetFilters%5B0%5D.IsApplied=true&FacetFilters%5B0%5D.FieldName=&SearchResultsModuleName=Search+Results&SearchFiltersModuleName=Search+Results+Filters&SortCriteria=0&SortDirection=0&SearchType=3&OrganizationIds=8171&PostalCode=&ResultsType=0&fc=&fl=&fcf=&afc=&afl=&afcf=&TotalContentPages=NaN" company = {"company": "Primark"} finalJobs = list() @@ -13,13 +13,15 @@ "Accept-Language": "en-GB,en;q=0.9", }) -scraper.get_from_url(url, verify=False) +scraper.get_from_url(url,"JSON" ,verify=False) + +scraper.__init__(scraper.markup.get("results"), "html.parser") jobs = scraper.find("section", {"id": "search-results-list"}).find_all("li") for job in jobs: job_title = job.find("h3").text.strip() - job_link = "https://ro.cariera.primark.com" + job.find("a").get("href") + job_link = "https://careers.primark.com" + job.find("a").get("href") city = translate_city(job.find( "span", {"class": "job-list-info--location"}).text.split(",")[0].strip()) county = _counties.get_county(city)