diff --git a/sites/atkinsrealis.py b/sites/atkinsrealis.py index d0316d0..aa967e5 100644 --- a/sites/atkinsrealis.py +++ b/sites/atkinsrealis.py @@ -1,61 +1,53 @@ from scraper.Scraper import Scraper from utils import publish_or_update, publish_logo, create_job, show_jobs, translate_city -from getCounty import GetCounty +import json +from math import ceil -_counties = GetCounty() company = "atkinsrealis" -url = "https://careers.atkinsrealis.com/jobs?options=,6477&page=" -page = 1 +url = "https://slihrms.wd3.myworkdayjobs.com/wday/cxs/slihrms/Careers/jobs" +post_data = {"appliedFacets": {"locations": [ + "a19c13ab2cba10a5238f4fb548d3bdf5"]}, "limit": 20, "offset": 0, "searchText": ""} scraper = Scraper() scraper.set_headers( - { + { + "content-type": "application/json", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/ 91.0.4472.124 Safari/537.36", } ) -scraper.get_from_url(url + str(page), verify=False) +obj = scraper.post(url, json.dumps(post_data)) -jobs = [] -jobs_elements = scraper.find("div", class_="attrax-list-widget__lists").find_all( - "div", class_="attrax-vacancy-tile") +step = 20 +total_jobs = obj.json()["total"] +pages = ceil(total_jobs / step) -while True: - for job in jobs_elements: +jobs = [] - job_title = job.find( - "a", class_="attrax-vacancy-tile__title").text.strip() - job_link = ( - "https://careers.atkinsrealis.com" - + job.find("a", class_="attrax-vacancy-tile__title")["href"] - ) - city = translate_city( - job.find( - "div", class_="attrax-vacancy-tile__option-location-valueset" - ).text.strip() - ) +for pages in range(0, pages): + if pages > 1: + post_data["offset"] = pages * step + obj = scraper.post(url, json.dumps(post_data)) - county = _counties.get_county(city) or [] + for job in obj.json()["jobPostings"]: + job_title = job["title"] + job_link = "https://slihrms.wd3.myworkdayjobs.com/en-US/Careers" + \ + job["externalPath"] + country = "Romania" + remote = [job.get("remoteType")] if job.get("remoteType") else [] jobs.append( create_job( job_title=job_title, job_link=job_link, - city=city, - county=county, - company=company, country="Romania", + city="Bucuresti", + county="Bucuresti", + company=company, + remote=remote, ) ) - page += 1 - scraper.get_from_url(url + str(page), verify=False) - try: - jobs_elements = scraper.find( - "div", class_="attrax-list-widget__lists" - ).find_all("div", class_="attrax-vacancy-tile") - except AttributeError: - break publish_or_update(jobs) diff --git a/sites/goodyear.py b/sites/goodyear.py index d2d3ee7..3e4e576 100644 --- a/sites/goodyear.py +++ b/sites/goodyear.py @@ -4,10 +4,10 @@ import json company = "GoodYear" -url = " https://goodyear.wd1.myworkdayjobs.com/wday/cxs/goodyear/GoodyearCareers/jobs" +url = "https://goodyear.wd1.myworkdayjobs.com/wday/cxs/goodyear/GoodyearCareers/jobs" -post_data = {"appliedFacets": {"locations": ["013bdadd1adf100168e66388c7390000", - "8e6033e1c034100168b80fcc8d420000"]}, "limit": 20, "offset": 0, "searchText": ""} +post_data = {"appliedFacets": {"locations": ["8e6033e1c034100168b80fcc8d420000", + "013bdadd1adf100168e66388c7390000"]}, "limit": 20, "offset": 0, "searchText": ""} headers = {"Content-Type": "application/json"} scraper = Scraper() @@ -30,7 +30,7 @@ job_link = "https://goodyear.wd1.myworkdayjobs.com/en-US/GoodyearCareers" + \ job["externalPath"] country = "Romania" - remote = [job.get("remoteType") if job.get("remoteType") else []] + remote = [job.get("remoteType")] if job.get("remoteType") else [] jobs.append( create_job( diff --git a/sites/hcltechnologies.py b/sites/hcltechnologies.py index a93f754..6367815 100644 --- a/sites/hcltechnologies.py +++ b/sites/hcltechnologies.py @@ -1,98 +1,53 @@ from scraper.Scraper import Scraper from utils import ( show_jobs, - translate_city, publish_or_update, publish_logo, acurate_city_and_county, ) -from getCounty import GetCounty +import json -_counties = GetCounty() company = "hcltechnologies" finalJobs = list() acurate_city = acurate_city_and_county(Iasi={"city": "Iasi", "county": "Iasi"}) -url = "https://www.hcltech.com/views/ajax?_wrapper_format=drupal_ajax&view_name=hcl_ers_career_jobs&view_display_id=block_1&view_args=romania_job&page=" +post_data = {"locale": "en_US", "pageNumber": 0, "sortBy": "", "keywords": "", "location": "", "facetFilters": { +}, "brand": "", "skills": [], "categoryId": 9556055, "alertId": "", "rcmCandidateId": ""} + +url = "https://careers.hcltech.com/services/recruiting/v1/jobs" pageNumber = 0 headers = { - "X-Requested-With": "XMLHttpRequest", + "Content-Type": "application/json", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.0 Safari/605.1.15" } scraper = Scraper() scraper.set_headers(headers) -scraper.get_from_url(url + str(pageNumber), "JSON") - -html = scraper.markup[3].get("data") - -scraper.__init__(html, "html.parser") - -jobs = ( - scraper.find("div", {"class": "view-hcl-ers-career-jobs"}) - .find("tbody") - .find_all("tr") -) - -while jobs: - for job in jobs: - job_title = job.find("td", {"class": "views-field-title"}).text.strip() - job_link = "https://www.hcltech.com" + job.find( - "td", {"class": "views-field-title"} - ).find("a").get("href") - location = job.find( - "td", {"class": "views-field-field-job-location"} - ).text.strip() - - city = [] - county = [] - remote = [] - - job_element = { - "job_title": job_title, - "job_link": job_link, - "company": company, - "country": "Romania", - } - if "Remote" in location: - remote.append("Remote") - elif "Hybrid" in location: - remote.append("Hybrid") - - if "(Bucharest/Iasi)" in location: - city = ["Bucuresti", "Iasi"] - county = ["Bucuresti", "Iasi"] - - else: - city = translate_city(location.split(" ")[0]) - if acurate_city.get(city): - city = acurate_city.get(city).get("city") - county = acurate_city.get(city).get("county") - else: - county = _counties.get_county(city) - - if not county: - city = [] - county = [] - - job_element.update({"city": city, "county": county, "remote": remote}) +jobs = scraper.post(url, json.dumps(post_data)).json() + +total_jobs = jobs.get("totalJobs", 0) +pages = (total_jobs //10) + 1 + +while pageNumber < pages: + for job in jobs.get("jobSearchResult", []): + obj = job.get("response") + job_title = obj.get("unifiedStandardTitle") + job_link = f"https://careers.hcltech.com/job/{obj.get('urlTitle')}/{obj.get('id')}-{obj.get('supportedLocales')[0]}" + finalJobs.append( + { + "job_title": job_title, + "job_link": job_link, + "company": company, + "country": "Romania", + } + ) + pageNumber += 1 + post_data["pageNumber"] = pageNumber + jobs = scraper.post(url, json.dumps(post_data)).json() - finalJobs.append(job_element) - pageNumber += 1 - scraper = Scraper() - scraper.set_headers(headers) - scraper.get_from_url(url + str(pageNumber), "JSON") - html = scraper.markup[3].get("data") - scraper.__init__(html, "html.parser") - try: - jobs = ( - scraper.find("div", {"class": "view-hcl-ers-career-jobs"}) - .find("tbody") - .find_all("tr") - ) - except AttributeError: - jobs = False + publish_or_update(finalJobs) diff --git a/sites/hm.py b/sites/hm.py index 2531649..c93b2aa 100644 --- a/sites/hm.py +++ b/sites/hm.py @@ -1,24 +1,26 @@ from scraper.Scraper import Scraper from utils import publish_or_update, publish_logo, show_jobs from getCounty import GetCounty, remove_diacritics +import json + +_counties = GetCounty() +data = {"locations": ["cou:ro"], "workAreas": [], + "contractType": [], "fulltext": "", "order_by": "", "page": 1} -_counties = GetCounty() -data = { - "locations": [], - "workAreas": [], - "contractType": [], - "fulltext": "Romania", - "order_by": "relevance", - "page": 1, -} url = "https://career.hm.com/wp-json/hm/v1/sr/jobs/search?_locale=user" +headers = { + "Content-Type": "application/json", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.0 Safari/605.1.15" +} + scraper = Scraper() -jobs = scraper.post(url, data).json() +scraper.set_headers(headers) +jobs = scraper.post(url, json.dumps(data)).json() company = {"company": "HM"} finalJobs = list() - +z while jobs.get("jobs"): for job in jobs.get("jobs"): job_title = job.get("title")