Skip to content

Commit c22dc3f

Browse files
committed
fix:bio
1 parent 2bbb42c commit c22dc3f

File tree

4 files changed

+45
-25
lines changed

4 files changed

+45
-25
lines changed

examples/upload_staff_to_clay.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from staffspy import LinkedInAccount
2+
from staffspy.utils.utils import upload_to_clay
3+
4+
session_file = "session.pkl"
5+
account = LinkedInAccount(session_file=session_file, log_level=2)
6+
7+
connections = account.scrape_connections(extra_profile_data=True, max_results=3)
8+
9+
clay_webhook_url = (
10+
"https://api.clay.com/v3/sources/webhook/pull-in-data-from-a-webhook-XXXXXXXXXXXXXX"
11+
)
12+
upload_to_clay(webhook_url=clay_webhook_url, data=connections)

staffspy/__init__.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,14 @@
1515
extract_emails_from_text,
1616
clean_df,
1717
)
18-
from staffspy.utils.driver_type import DriverType
18+
from staffspy.utils.driver_type import DriverType, BrowserType
19+
20+
__all__ = [
21+
"LinkedInAccount",
22+
"SolverType",
23+
"DriverType",
24+
"BrowserType",
25+
]
1926

2027

2128
class LinkedInAccount:

staffspy/linkedin/employee_bio.py

Lines changed: 7 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
class EmployeeBioFetcher:
1010
def __init__(self, session):
1111
self.session = session
12-
self.endpoint = "https://www.linkedin.com/voyager/api/graphql?queryId=voyagerIdentityDashProfileComponents.9117695ef207012719e3e0681c667e14&queryName=ProfileComponentsBySectionType&variables=(tabIndex:0,sectionType:languages,profileUrn:urn%3Ali%3Afsd_profile%3A{employee_id},count:50)"
12+
self.endpoint = "https://www.linkedin.com/voyager/api/graphql?queryId=voyagerIdentityDashProfileCards.9ad2590cb61a073ad514922fa752f566&queryName=ProfileTabInitialCards&variables=(count:50,profileUrn:urn%3Ali%3Afsd_profile%3A{employee_id})"
1313

1414
def fetch_employee_bio(self, base_staff):
1515
ep = self.endpoint.format(employee_id=base_staff.id)
@@ -18,36 +18,19 @@ def fetch_employee_bio(self, base_staff):
1818
if res.status_code == 429:
1919
return TooManyRequests("429 Too Many Requests")
2020
if not res.ok:
21-
logger.debug(res.text[:200])
21+
logger.debug(res.text)
2222
return False
2323
try:
24-
res_json = res.json()
24+
data = res.json()
2525
except json.decoder.JSONDecodeError:
26-
logger.debug(res.text[:200])
26+
logger.debug(res.text)
2727
return False
2828

2929
try:
30-
employee_json = list(
31-
filter(
32-
lambda x: ",ABOUT," in x["entityUrn"],
33-
res_json["data"]["identityDashProfileCardsByInitialCards"][
34-
"elements"
35-
],
36-
)
37-
)
30+
base_staff.bio = data["data"]["identityDashProfileCardsByInitialCards"][
31+
"elements"
32+
][3]["topComponents"][1]["components"]["textComponent"]["text"]["text"]
3833
except (KeyError, IndexError, TypeError):
39-
logger.debug(res_json)
4034
return False
4135

42-
self.parse_emp_bio(base_staff, employee_json)
4336
return True
44-
45-
def parse_emp_bio(self, emp, emp_dict):
46-
"""Parse the employee data from the employee profile."""
47-
try:
48-
bio = emp_dict[0]["topComponents"][1]["components"]["textComponent"][
49-
"text"
50-
]["text"]
51-
except:
52-
bio = None
53-
emp.bio = bio

staffspy/utils/utils.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,5 +447,23 @@ def clean_df(staff_df):
447447
return staff_df
448448

449449

450+
def upload_to_clay(webhook_url: str, data: pd.DataFrame):
451+
records = data.to_dict("records")
452+
453+
responses = []
454+
for i, row in enumerate(records, start=1):
455+
try:
456+
response = requests.post(
457+
webhook_url, headers={"Accept": "application/json"}, json=row
458+
)
459+
response.raise_for_status()
460+
logger.info(f"Uploaded row to Clay: {i} / {len(records)}")
461+
except requests.exceptions.RequestException as e:
462+
logger.error(f"Failed to upload row to Clay: {str(e)}")
463+
responses.append({"error": str(e), "data": row})
464+
465+
return responses
466+
467+
450468
if __name__ == "__main__":
451469
p = parse_dates("May 2018 - Jun 2024")

0 commit comments

Comments
 (0)