Skip to content

Commit 35eb363

Browse files
committed
enh:connection contact info
1 parent 47c90eb commit 35eb363

File tree

3 files changed

+61
-28
lines changed

3 files changed

+61
-28
lines changed

README.md

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ _why pay $100/mo for LSN when you could do it for free and get a nice csv to go
99
- Scrapes staff from a company on **LinkedIn**
1010
- Obtains skills, experiences, certifications & more
1111
- Or fetch individuals users / comments on posts
12-
- Scrape your own LinkedIn connections with details
1312
- Aggregates the employees in a Pandas DataFrame
1413

1514
[Video Guide for StaffSpy](https://youtu.be/DNFmjvpZBTs) - _updated for release v0.2.18_
@@ -62,17 +61,10 @@ companies = account.scrape_companies(
6261
company_names=['openai', 'microsoft']
6362
)
6463

65-
# fetch connections
66-
connections = account.scrape_connections(
67-
extra_profile_data=True,
68-
max_results=50
69-
)
70-
7164
staff.to_csv("staff.csv", index=False)
7265
users.to_csv("users.csv", index=False)
7366
comments.to_csv("comments.csv", index=False)
7467
companies.to_csv("companies.csv", index=False)
75-
connections.to_csv("connections.csv", index=False)
7668
```
7769

7870
#### Browser login
@@ -181,10 +173,11 @@ Optional
181173

182174
```plaintext
183175
├── max_results (int):
184-
| maximum number of connections to fetch (default is all)
176+
| maximum number of connections to fetch (default is very high)
177+
| e.g. 50 to fetch first 50 connections
185178
|
186179
├── extra_profile_data (bool):
187-
| gets all profile info
180+
| fetches educations, experiences, skills, certifications for each connection (Default false)
188181
```
189182

190183
### LinkedIn notes
@@ -282,8 +275,16 @@ Staff
282275
│ ├── cert_id
283276
│ └── cert_link
284277
285-
└── Educational Background
286-
├── years
287-
├── school
288-
└── degree
278+
├── Educational Background
279+
| ├── years
280+
| ├── school
281+
| └── degree
282+
283+
└── Connection Info (only when a connection and enabled on their profile)
284+
├── email_address
285+
├── address
286+
├── birthday
287+
├── websites
288+
├── phone_numbers
289+
└── created_at
289290
```

staffspy/linkedin/linkedin.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import requests
1414

1515
import staffspy.utils.utils as utils
16+
from linkedin.contact_info import ContactInfoFetcher
1617
from staffspy.utils.exceptions import TooManyRequests, BadCookies, GeoUrnNotFound
1718
from staffspy.linkedin.certifications import CertificationFetcher
1819
from staffspy.linkedin.employee import EmployeeFetcher
@@ -57,6 +58,7 @@ def __init__(self, session: requests.Session):
5758
self.experiences = ExperiencesFetcher(self.session)
5859
self.bio = EmployeeBioFetcher(self.session)
5960
self.languages = LanguagesFetcher(self.session)
61+
self.contact = ContactInfoFetcher(self.session)
6062

6163
def search_companies(self, company_name: str):
6264
"""Get the company id and staff count from the company name."""
@@ -429,26 +431,28 @@ def fetch_all_info_for_employee(self, employee: Staff, index: int):
429431
f"Fetching data for account {employee.id} {index:>4} / {self.num_staff} - {employee.profile_link}"
430432
)
431433

432-
with ThreadPoolExecutor(max_workers=7) as executor:
434+
task_functions = [
435+
(self.employees.fetch_employee, (employee, self.domain), "employee"),
436+
(self.skills.fetch_skills, (employee,), "skills"),
437+
(self.experiences.fetch_experiences, (employee,), "experiences"),
438+
(self.certs.fetch_certifications, (employee,), "certifications"),
439+
(self.schools.fetch_schools, (employee,), "schools"),
440+
(self.bio.fetch_employee_bio, (employee,), "bio"),
441+
(self.languages.fetch_languages, (employee,), "languages"),
442+
]
443+
444+
with ThreadPoolExecutor(max_workers=len(task_functions)) as executor:
433445
tasks = {
434-
executor.submit(
435-
self.employees.fetch_employee, employee, self.domain
436-
): "employee",
437-
executor.submit(self.skills.fetch_skills, employee): "skills",
438-
executor.submit(self.experiences.fetch_experiences, employee): (
439-
"experiences"
440-
),
441-
executor.submit(self.certs.fetch_certifications, employee): (
442-
"certifications"
443-
),
444-
executor.submit(self.schools.fetch_schools, employee): "schools",
445-
executor.submit(self.bio.fetch_employee_bio, employee): "bio",
446-
executor.submit(self.languages.fetch_languages, employee): "lanaguages",
446+
executor.submit(func, *args): name
447+
for func, args, name in task_functions
447448
}
448449

449450
for future in as_completed(tasks):
450451
result = future.result()
451452

453+
if employee.is_connection:
454+
self.contact.fetch_contact_info(employee)
455+
452456
def fetch_user_profile_data_from_public_id(self, user_id: str, key: str):
453457
"""Fetches data given the public LinkedIn user id"""
454458
endpoint = self.public_user_id_ep.format(user_id=user_id)

staffspy/utils/models.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,25 @@ def to_dict(self):
5757
}
5858

5959

60+
class ContactInfo(BaseModel):
61+
email_address: str | None = None
62+
websites: list | None = None
63+
phone_numbers: list | None = None
64+
address: str | None = None
65+
birthday: str | None = None
66+
created_at: str | None = None
67+
68+
def to_dict(self):
69+
return {
70+
"email_address": self.email_address,
71+
"websites": self.websites,
72+
"phone_numbers": self.phone_numbers,
73+
"address": self.address,
74+
"birthday": self.birthday,
75+
"created_at": self.created_at,
76+
}
77+
78+
6079
class Certification(BaseModel):
6180
title: str | None = None
6281
issuer: str | None = None
@@ -127,6 +146,7 @@ class Staff(BaseModel):
127146
skills: list[Skill] | None = None
128147
experiences: list[Experience] | None = None
129148
certifications: list[Certification] | None = None
149+
contact_info: ContactInfo | None = None
130150
schools: list[School] | None = None
131151
languages: list[str] | None = None
132152

@@ -182,6 +202,8 @@ def to_dict(self):
182202
if len(sorted_experiences) > 0 and sorted_experiences[0].end_date is None
183203
else None
184204
)
205+
206+
contact_info = self.contact_info.to_dict() if self.contact_info else {}
185207
return {
186208
"search_term": self.search_term,
187209
"id": self.id,
@@ -236,6 +258,12 @@ def to_dict(self):
236258
"potential_emails": self.potential_emails,
237259
"profile_photo": self.profile_photo,
238260
"banner_photo": self.banner_photo,
261+
"connection_created_at": contact_info.get("created_at"),
262+
"connection_email": contact_info.get("email_address"),
263+
"connection_phone_numbers": contact_info.get("phone_numbers"),
264+
"connection_websites": contact_info.get("websites"),
265+
"connection_street_address": contact_info.get("address"),
266+
"connection_birthday": contact_info.get("birthday"),
239267
}
240268

241269
def estimate_age_based_on_education(self):

0 commit comments

Comments
 (0)