From 19c7b69fbb822f27268eedb8e9b09a4bc7c00435 Mon Sep 17 00:00:00 2001 From: lalalaurentiu Date: Thu, 30 Oct 2025 16:31:45 +0200 Subject: [PATCH] Add country field to optional keys in scraper validation and update job data structure for 1and1 site --- __test__/runTest.py | 2 +- sites/1and1.py | 52 ++++++++++++++++++++++++++++++--------------- 2 files changed, 36 insertions(+), 18 deletions(-) diff --git a/__test__/runTest.py b/__test__/runTest.py index c8d8833..6b5cf81 100644 --- a/__test__/runTest.py +++ b/__test__/runTest.py @@ -56,7 +56,7 @@ # Define required and optional keys required_keys = ['company', 'job_title', 'job_link'] - optional_keys = ['city', 'county', 'remote'] + optional_keys = ['city', 'county', 'remote', 'country'] allowed_keys = required_keys + optional_keys # Define allowed remote values diff --git a/sites/1and1.py b/sites/1and1.py index 597803c..5fb7a7b 100644 --- a/sites/1and1.py +++ b/sites/1and1.py @@ -1,36 +1,54 @@ -from scraper_peviitor import Scraper -from utils import translate_city, publish_logo, show_jobs, publish_or_update, get_jobtype +from scraper.Scraper import Scraper +from utils import publish_or_update, publish_logo, create_job, show_jobs, translate_city, get_jobtype from getCounty import GetCounty -import re url = "https://www.1and1.ro/jobs.json" - -company = {"company": "1and1"} +company = {"company": "ionosgroup"} + +data = { + "tx_cisocareer_jobsearchform[__referrer][@extension]": "CisoCareer", + "tx_cisocareer_jobsearchform[__referrer][@controller]": "JobOffer", + "tx_cisocareer_jobsearchform[__referrer][@action]": "find", + "tx_cisocareer_jobsearchform[__referrer][arguments]": "YTo2OntzOjEwOiJ0ZXh0U2VhcmNoIjtzOjA6IiI7czo4OiJjYXRlZ29yeSI7czowOiIiO3M6MTE6ImNhcmVlckxldmVsIjtzOjA6IiI7czoxNjoibW9kZU9mRW1wbG95bWVudCI7czowOiIiO3M6MTI6ImxvY2F0aW9uTmFtZSI7czo2OiJiZXJsaW4iO3M6MTE6ImNvbXBhbnlOYW1lIjtzOjA6IiI7fQ == 7e81641e41d3a9f030f8a847b0221d4511d0ae3b", + "tx_cisocareer_jobsearchform[__referrer][@request]": '{"@extension": "CisoCareer", "@controller": "JobOffer", "@action": "find"}70ddfbe9634ffb03829ec95fe9baf5eaf3c478ff', + "tx_cisocareer_jobsearchform[__trustedProperties]": '{"textSearch": 1, "category": 1, "careerLevel": 1, "modeOfEmployment": 1, "locationName": 1, "companyName": 1}8458397472593a1c8ae53526144be7905bfc001e', + "tx_cisocareer_jobsearchform[textSearch]": "", + "tx_cisocareer_jobsearchform[category]": "", + "tx_cisocareer_jobsearchform[careerLevel]": "", + "tx_cisocareer_jobsearchform[modeOfEmployment]": "", + "tx_cisocareer_jobsearchform[locationName]": "bucharest", + "tx_cisocareer_jobsearchform[companyName]": "", +} scraper = Scraper() -scraper.url = url +scraper.set_headers( + { + + "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", + } +) -jobs = scraper.getJson().get("jobs") +html = scraper.post(url, data=data) +scraper.__init__(html.text, "html.parser") +jobs_elements = scraper.find_all("a", class_="joboffer-list-job") -remote_pattern = re.compile(r"\(.+\)") -_counties = GetCounty() finalJobs = [ { - "job_title": job.get("JobTitle"), - "job_link": "https://www.1and1.ro/careers/" + job.get("RefURL"), - "country": "Romania", - "city": translate_city(job.get("Location")), - "county": _counties.get_county(translate_city(job.get("Location"))), - "remote": get_jobtype(job.get("JobTitle")), + "job_title": job.find("h4").text.strip(), + "job_link": "https://www.ionos-group.com" + job["href"], + "city": "Bucuresti", + "county": "Bucuresti", + "remote": get_jobtype(job.find("h4").text.strip()), "company": company.get("company"), + "country": "Romania", } - for job in jobs + for job in jobs_elements ] publish_or_update(finalJobs) -logoUrl = "https://cdn.website-editor.net/b236a61347464e4b904f5e6b661c2af9/dms3rep/multi/1and1-logo.svg" +logoUrl = "https://www.ionos-group.com/_assets/debf05b51933fca5f1c347f2aabc0cf0/Media/ionos-group.svg" publish_logo(company.get("company"), logoUrl) show_jobs(finalJobs)