enh:connection contact info

cullenwatson · cullenwatson · commit 35eb363e8d0d · 2024-12-31T19:02:44.000-06:00
diff --git a/README.md b/README.md
@@ -9,7 +9,6 @@ _why pay $100/mo for LSN when you could do it for free and get a nice csv to go
 - Scrapes staff from a company on **LinkedIn**
 - Obtains skills, experiences, certifications & more
 - Or fetch individuals users / comments on posts
-- Scrape your own LinkedIn connections with details
 - Aggregates the employees in a Pandas DataFrame
 
 [Video Guide for StaffSpy](https://youtu.be/DNFmjvpZBTs) - _updated for release v0.2.18_
@@ -62,17 +61,10 @@ companies = account.scrape_companies(
     company_names=['openai', 'microsoft']
 )
 
-# fetch connections
-connections = account.scrape_connections(
-    extra_profile_data=True,
-    max_results=50
-)
-
 staff.to_csv("staff.csv", index=False)
 users.to_csv("users.csv", index=False)
 comments.to_csv("comments.csv", index=False)
 companies.to_csv("companies.csv", index=False)
-connections.to_csv("connections.csv", index=False)
 ```
 
 #### Browser login
@@ -181,10 +173,11 @@ Optional
 
 ```plaintext
 ├── max_results (int):
-|    maximum number of connections to fetch (default is all)
+|    maximum number of connections to fetch (default is very high)
+|    e.g. 50 to fetch first 50 connections
 |
 ├── extra_profile_data (bool):
-|    gets all profile info
+|    fetches educations, experiences, skills, certifications for each connection (Default false)
 ```
 
 ### LinkedIn notes
@@ -282,8 +275,16 @@ Staff
 │   ├── cert_id
 │   └── cert_link
 │
-└── Educational Background
-    ├── years
-    ├── school
-    └── degree
+├── Educational Background
+|   ├── years
+|   ├── school
+|   └── degree
+│
+└── Connection Info (only when a connection and enabled on their profile)
+    ├── email_address
+    ├── address
+    ├── birthday
+    ├── websites
+    ├── phone_numbers
+    └── created_at
 ```
diff --git a/staffspy/linkedin/linkedin.py b/staffspy/linkedin/linkedin.py
@@ -13,6 +13,7 @@
 import requests
 
 import staffspy.utils.utils as utils
+from linkedin.contact_info import ContactInfoFetcher
 from staffspy.utils.exceptions import TooManyRequests, BadCookies, GeoUrnNotFound
 from staffspy.linkedin.certifications import CertificationFetcher
 from staffspy.linkedin.employee import EmployeeFetcher
@@ -57,6 +58,7 @@ def __init__(self, session: requests.Session):
         self.experiences = ExperiencesFetcher(self.session)
         self.bio = EmployeeBioFetcher(self.session)
         self.languages = LanguagesFetcher(self.session)
+        self.contact = ContactInfoFetcher(self.session)
 
     def search_companies(self, company_name: str):
         """Get the company id and staff count from the company name."""
@@ -429,26 +431,28 @@ def fetch_all_info_for_employee(self, employee: Staff, index: int):
             f"Fetching data for account {employee.id} {index:>4} / {self.num_staff} - {employee.profile_link}"
         )
 
-        with ThreadPoolExecutor(max_workers=7) as executor:
+        task_functions = [
+            (self.employees.fetch_employee, (employee, self.domain), "employee"),
+            (self.skills.fetch_skills, (employee,), "skills"),
+            (self.experiences.fetch_experiences, (employee,), "experiences"),
+            (self.certs.fetch_certifications, (employee,), "certifications"),
+            (self.schools.fetch_schools, (employee,), "schools"),
+            (self.bio.fetch_employee_bio, (employee,), "bio"),
+            (self.languages.fetch_languages, (employee,), "languages"),
+        ]
+
+        with ThreadPoolExecutor(max_workers=len(task_functions)) as executor:
             tasks = {
-                executor.submit(
-                    self.employees.fetch_employee, employee, self.domain
-                ): "employee",
-                executor.submit(self.skills.fetch_skills, employee): "skills",
-                executor.submit(self.experiences.fetch_experiences, employee): (
-                    "experiences"
-                ),
-                executor.submit(self.certs.fetch_certifications, employee): (
-                    "certifications"
-                ),
-                executor.submit(self.schools.fetch_schools, employee): "schools",
-                executor.submit(self.bio.fetch_employee_bio, employee): "bio",
-                executor.submit(self.languages.fetch_languages, employee): "lanaguages",
+                executor.submit(func, *args): name
+                for func, args, name in task_functions
             }
 
             for future in as_completed(tasks):
                 result = future.result()
 
+        if employee.is_connection:
+            self.contact.fetch_contact_info(employee)
+
     def fetch_user_profile_data_from_public_id(self, user_id: str, key: str):
         """Fetches data given the public LinkedIn user id"""
         endpoint = self.public_user_id_ep.format(user_id=user_id)
diff --git a/staffspy/utils/models.py b/staffspy/utils/models.py
@@ -57,6 +57,25 @@ def to_dict(self):
         }
 
 
+class ContactInfo(BaseModel):
+    email_address: str | None = None
+    websites: list | None = None
+    phone_numbers: list | None = None
+    address: str | None = None
+    birthday: str | None = None
+    created_at: str | None = None
+
+    def to_dict(self):
+        return {
+            "email_address": self.email_address,
+            "websites": self.websites,
+            "phone_numbers": self.phone_numbers,
+            "address": self.address,
+            "birthday": self.birthday,
+            "created_at": self.created_at,
+        }
+
+
 class Certification(BaseModel):
     title: str | None = None
     issuer: str | None = None
@@ -127,6 +146,7 @@ class Staff(BaseModel):
     skills: list[Skill] | None = None
     experiences: list[Experience] | None = None
     certifications: list[Certification] | None = None
+    contact_info: ContactInfo | None = None
     schools: list[School] | None = None
     languages: list[str] | None = None
 
@@ -182,6 +202,8 @@ def to_dict(self):
             if len(sorted_experiences) > 0 and sorted_experiences[0].end_date is None
             else None
         )
+
+        contact_info = self.contact_info.to_dict() if self.contact_info else {}
         return {
             "search_term": self.search_term,
             "id": self.id,
@@ -236,6 +258,12 @@ def to_dict(self):
             "potential_emails": self.potential_emails,
             "profile_photo": self.profile_photo,
             "banner_photo": self.banner_photo,
+            "connection_created_at": contact_info.get("created_at"),
+            "connection_email": contact_info.get("email_address"),
+            "connection_phone_numbers": contact_info.get("phone_numbers"),
+            "connection_websites": contact_info.get("websites"),
+            "connection_street_address": contact_info.get("address"),
+            "connection_birthday": contact_info.get("birthday"),
         }
 
     def estimate_age_based_on_education(self):