Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 26 additions & 34 deletions sites/atkinsrealis.py
Original file line number Diff line number Diff line change
@@ -1,61 +1,53 @@
from scraper.Scraper import Scraper
from utils import publish_or_update, publish_logo, create_job, show_jobs, translate_city
from getCounty import GetCounty
import json
from math import ceil

_counties = GetCounty()
company = "atkinsrealis"
url = "https://careers.atkinsrealis.com/jobs?options=,6477&page="
page = 1
url = "https://slihrms.wd3.myworkdayjobs.com/wday/cxs/slihrms/Careers/jobs"
post_data = {"appliedFacets": {"locations": [
"a19c13ab2cba10a5238f4fb548d3bdf5"]}, "limit": 20, "offset": 0, "searchText": ""}

scraper = Scraper()
scraper.set_headers(
{
{
"content-type": "application/json",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/ 91.0.4472.124 Safari/537.36",
}
)

scraper.get_from_url(url + str(page), verify=False)
obj = scraper.post(url, json.dumps(post_data))

jobs = []
jobs_elements = scraper.find("div", class_="attrax-list-widget__lists").find_all(
"div", class_="attrax-vacancy-tile")
step = 20
total_jobs = obj.json()["total"]
pages = ceil(total_jobs / step)

while True:
for job in jobs_elements:
jobs = []

job_title = job.find(
"a", class_="attrax-vacancy-tile__title").text.strip()
job_link = (
"https://careers.atkinsrealis.com"
+ job.find("a", class_="attrax-vacancy-tile__title")["href"]
)
city = translate_city(
job.find(
"div", class_="attrax-vacancy-tile__option-location-valueset"
).text.strip()
)
for pages in range(0, pages):
if pages > 1:
post_data["offset"] = pages * step
obj = scraper.post(url, json.dumps(post_data))

county = _counties.get_county(city) or []
for job in obj.json()["jobPostings"]:
job_title = job["title"]
job_link = "https://slihrms.wd3.myworkdayjobs.com/en-US/Careers" + \
job["externalPath"]
country = "Romania"
remote = [job.get("remoteType")] if job.get("remoteType") else []

jobs.append(
create_job(
job_title=job_title,
job_link=job_link,
city=city,
county=county,
company=company,
country="Romania",
city="Bucuresti",
county="Bucuresti",
company=company,
remote=remote,
)
)

page += 1
scraper.get_from_url(url + str(page), verify=False)
try:
jobs_elements = scraper.find(
"div", class_="attrax-list-widget__lists"
).find_all("div", class_="attrax-vacancy-tile")
except AttributeError:
break

publish_or_update(jobs)

Expand Down
8 changes: 4 additions & 4 deletions sites/goodyear.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
import json

company = "GoodYear"
url = " https://goodyear.wd1.myworkdayjobs.com/wday/cxs/goodyear/GoodyearCareers/jobs"
url = "https://goodyear.wd1.myworkdayjobs.com/wday/cxs/goodyear/GoodyearCareers/jobs"

post_data = {"appliedFacets": {"locations": ["013bdadd1adf100168e66388c7390000",
"8e6033e1c034100168b80fcc8d420000"]}, "limit": 20, "offset": 0, "searchText": ""}
post_data = {"appliedFacets": {"locations": ["8e6033e1c034100168b80fcc8d420000",
"013bdadd1adf100168e66388c7390000"]}, "limit": 20, "offset": 0, "searchText": ""}

headers = {"Content-Type": "application/json"}
scraper = Scraper()
Expand All @@ -30,7 +30,7 @@
job_link = "https://goodyear.wd1.myworkdayjobs.com/en-US/GoodyearCareers" + \
job["externalPath"]
country = "Romania"
remote = [job.get("remoteType") if job.get("remoteType") else []]
remote = [job.get("remoteType")] if job.get("remoteType") else []

jobs.append(
create_job(
Expand Down
103 changes: 29 additions & 74 deletions sites/hcltechnologies.py
Original file line number Diff line number Diff line change
@@ -1,98 +1,53 @@
from scraper.Scraper import Scraper
from utils import (
show_jobs,
translate_city,
publish_or_update,
publish_logo,
acurate_city_and_county,
)
from getCounty import GetCounty
import json

_counties = GetCounty()
company = "hcltechnologies"
finalJobs = list()

acurate_city = acurate_city_and_county(Iasi={"city": "Iasi", "county": "Iasi"})

url = "https://www.hcltech.com/views/ajax?_wrapper_format=drupal_ajax&view_name=hcl_ers_career_jobs&view_display_id=block_1&view_args=romania_job&page="
post_data = {"locale": "en_US", "pageNumber": 0, "sortBy": "", "keywords": "", "location": "", "facetFilters": {
}, "brand": "", "skills": [], "categoryId": 9556055, "alertId": "", "rcmCandidateId": ""}

url = "https://careers.hcltech.com/services/recruiting/v1/jobs"
pageNumber = 0
headers = {
"X-Requested-With": "XMLHttpRequest",
"Content-Type": "application/json",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.0 Safari/605.1.15"
}
scraper = Scraper()
scraper.set_headers(headers)
scraper.get_from_url(url + str(pageNumber), "JSON")

html = scraper.markup[3].get("data")

scraper.__init__(html, "html.parser")

jobs = (
scraper.find("div", {"class": "view-hcl-ers-career-jobs"})
.find("tbody")
.find_all("tr")
)

while jobs:
for job in jobs:
job_title = job.find("td", {"class": "views-field-title"}).text.strip()
job_link = "https://www.hcltech.com" + job.find(
"td", {"class": "views-field-title"}
).find("a").get("href")
location = job.find(
"td", {"class": "views-field-field-job-location"}
).text.strip()

city = []
county = []
remote = []

job_element = {
"job_title": job_title,
"job_link": job_link,
"company": company,
"country": "Romania",
}

if "Remote" in location:
remote.append("Remote")
elif "Hybrid" in location:
remote.append("Hybrid")

if "(Bucharest/Iasi)" in location:
city = ["Bucuresti", "Iasi"]
county = ["Bucuresti", "Iasi"]

else:
city = translate_city(location.split(" ")[0])
if acurate_city.get(city):
city = acurate_city.get(city).get("city")
county = acurate_city.get(city).get("county")
else:
county = _counties.get_county(city)

if not county:
city = []
county = []

job_element.update({"city": city, "county": county, "remote": remote})
jobs = scraper.post(url, json.dumps(post_data)).json()

total_jobs = jobs.get("totalJobs", 0)
pages = (total_jobs //10) + 1

while pageNumber < pages:
for job in jobs.get("jobSearchResult", []):
obj = job.get("response")
job_title = obj.get("unifiedStandardTitle")
job_link = f"https://careers.hcltech.com/job/{obj.get('urlTitle')}/{obj.get('id')}-{obj.get('supportedLocales')[0]}"
finalJobs.append(
{
"job_title": job_title,
"job_link": job_link,
"company": company,
"country": "Romania",
}
)
pageNumber += 1
post_data["pageNumber"] = pageNumber
jobs = scraper.post(url, json.dumps(post_data)).json()

finalJobs.append(job_element)

pageNumber += 1
scraper = Scraper()
scraper.set_headers(headers)
scraper.get_from_url(url + str(pageNumber), "JSON")
html = scraper.markup[3].get("data")
scraper.__init__(html, "html.parser")
try:
jobs = (
scraper.find("div", {"class": "view-hcl-ers-career-jobs"})
.find("tbody")
.find_all("tr")
)
except AttributeError:
jobs = False


publish_or_update(finalJobs)

Expand Down
24 changes: 13 additions & 11 deletions sites/hm.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,26 @@
from scraper.Scraper import Scraper
from utils import publish_or_update, publish_logo, show_jobs
from getCounty import GetCounty, remove_diacritics
import json

_counties = GetCounty()
data = {"locations": ["cou:ro"], "workAreas": [],
"contractType": [], "fulltext": "", "order_by": "", "page": 1}

_counties = GetCounty()
data = {
"locations": [],
"workAreas": [],
"contractType": [],
"fulltext": "Romania",
"order_by": "relevance",
"page": 1,
}
url = "https://career.hm.com/wp-json/hm/v1/sr/jobs/search?_locale=user"

headers = {
"Content-Type": "application/json",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.0 Safari/605.1.15"
}

scraper = Scraper()
jobs = scraper.post(url, data).json()
scraper.set_headers(headers)
jobs = scraper.post(url, json.dumps(data)).json()

company = {"company": "HM"}
finalJobs = list()

z
while jobs.get("jobs"):
for job in jobs.get("jobs"):
job_title = job.get("title")
Expand Down