Skip to content

Commit 4de2a32

Browse files
committed
feat: ks comment api upgrade to v2
1 parent 2517e51 commit 4de2a32

File tree

2 files changed

+76
-49
lines changed

2 files changed

+76
-49
lines changed

media_platform/kuaishou/client.py

Lines changed: 65 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ def __init__(
5454
self.timeout = timeout
5555
self.headers = headers
5656
self._host = "https://www.kuaishou.com/graphql"
57+
self._rest_host = "https://www.kuaishou.com"
5758
self.playwright_page = playwright_page
5859
self.cookie_dict = cookie_dict
5960
self.graphql = KuaiShouGraphQL()
@@ -86,6 +87,29 @@ async def post(self, uri: str, data: dict) -> Dict:
8687
method="POST", url=f"{self._host}{uri}", data=json_str, headers=self.headers
8788
)
8889

90+
async def request_rest_v2(self, uri: str, data: dict) -> Dict:
91+
"""
92+
Make REST API V2 request (for comment endpoints)
93+
:param uri: API endpoint path
94+
:param data: request body
95+
:return: response data
96+
"""
97+
await self._refresh_proxy_if_expired()
98+
99+
json_str = json.dumps(data, separators=(",", ":"), ensure_ascii=False)
100+
async with httpx.AsyncClient(proxy=self.proxy) as client:
101+
response = await client.request(
102+
method="POST",
103+
url=f"{self._rest_host}{uri}",
104+
data=json_str,
105+
timeout=self.timeout,
106+
headers=self.headers,
107+
)
108+
result: Dict = response.json()
109+
if result.get("result") != 1:
110+
raise DataFetchError(f"REST API V2 error: {result}")
111+
return result
112+
89113
async def pong(self) -> bool:
90114
"""get a note to check if login state is ok"""
91115
utils.logger.info("[KuaiShouClient.pong] Begin pong kuaishou...")
@@ -149,36 +173,32 @@ async def get_video_info(self, photo_id: str) -> Dict:
149173
return await self.post("", post_data)
150174

151175
async def get_video_comments(self, photo_id: str, pcursor: str = "") -> Dict:
152-
"""get video comments
153-
:param photo_id: photo id you want to fetch
154-
:param pcursor: last you get pcursor, defaults to ""
155-
:return:
176+
"""Get video first-level comments using REST API V2
177+
:param photo_id: video id you want to fetch
178+
:param pcursor: pagination cursor, defaults to ""
179+
:return: dict with rootCommentsV2, pcursorV2, commentCountV2
156180
"""
157181
post_data = {
158-
"operationName": "commentListQuery",
159-
"variables": {"photoId": photo_id, "pcursor": pcursor},
160-
"query": self.graphql.get("comment_list"),
182+
"photoId": photo_id,
183+
"pcursor": pcursor,
161184
}
162-
return await self.post("", post_data)
185+
return await self.request_rest_v2("/rest/v/photo/comment/list", post_data)
163186

164187
async def get_video_sub_comments(
165-
self, photo_id: str, rootCommentId: str, pcursor: str = ""
188+
self, photo_id: str, root_comment_id: int, pcursor: str = ""
166189
) -> Dict:
167-
"""get video sub comments
168-
:param photo_id: photo id you want to fetch
169-
:param pcursor: last you get pcursor, defaults to ""
170-
:return:
190+
"""Get video second-level comments using REST API V2
191+
:param photo_id: video id you want to fetch
192+
:param root_comment_id: parent comment id (must be int type)
193+
:param pcursor: pagination cursor, defaults to ""
194+
:return: dict with subCommentsV2, pcursorV2
171195
"""
172196
post_data = {
173-
"operationName": "visionSubCommentList",
174-
"variables": {
175-
"photoId": photo_id,
176-
"pcursor": pcursor,
177-
"rootCommentId": rootCommentId,
178-
},
179-
"query": self.graphql.get("vision_sub_comment_list"),
197+
"photoId": photo_id,
198+
"pcursor": pcursor,
199+
"rootCommentId": root_comment_id, # Must be int type for V2 API
180200
}
181-
return await self.post("", post_data)
201+
return await self.request_rest_v2("/rest/v/photo/comment/sublist", post_data)
182202

183203
async def get_creator_profile(self, userId: str) -> Dict:
184204
post_data = {
@@ -204,22 +224,22 @@ async def get_video_all_comments(
204224
max_count: int = 10,
205225
):
206226
"""
207-
get video all comments include sub comments
208-
:param photo_id:
209-
:param crawl_interval:
210-
:param callback:
211-
:param max_count:
212-
:return:
227+
Get video all comments including sub comments (V2 REST API)
228+
:param photo_id: video id
229+
:param crawl_interval: delay between requests (seconds)
230+
:param callback: callback function for processing comments
231+
:param max_count: max number of comments to fetch
232+
:return: list of all comments
213233
"""
214234

215235
result = []
216236
pcursor = ""
217237

218238
while pcursor != "no_more" and len(result) < max_count:
219239
comments_res = await self.get_video_comments(photo_id, pcursor)
220-
vision_commen_list = comments_res.get("visionCommentList", {})
221-
pcursor = vision_commen_list.get("pcursor", "")
222-
comments = vision_commen_list.get("rootComments", [])
240+
# V2 API returns data at top level, not nested in visionCommentList
241+
pcursor = comments_res.get("pcursorV2", "no_more")
242+
comments = comments_res.get("rootCommentsV2", [])
223243
if len(result) + len(comments) > max_count:
224244
comments = comments[: max_count - len(result)]
225245
if callback: # If there is a callback function, execute the callback function
@@ -240,14 +260,14 @@ async def get_comments_all_sub_comments(
240260
callback: Optional[Callable] = None,
241261
) -> List[Dict]:
242262
"""
243-
Get all second-level comments under specified first-level comments, this method will continue to find all second-level comment information under first-level comments
263+
Get all second-level comments under specified first-level comments (V2 REST API)
244264
Args:
245265
comments: Comment list
246266
photo_id: Video ID
247267
crawl_interval: Delay unit for crawling comments once (seconds)
248268
callback: Callback after one comment crawl ends
249269
Returns:
250-
270+
List of sub comments
251271
"""
252272
if not config.ENABLE_GET_SUB_COMMENTS:
253273
utils.logger.info(
@@ -257,29 +277,30 @@ async def get_comments_all_sub_comments(
257277

258278
result = []
259279
for comment in comments:
260-
sub_comments = comment.get("subComments")
261-
if sub_comments and callback:
262-
await callback(photo_id, sub_comments)
280+
# V2 API uses hasSubComments (boolean) instead of subCommentsPcursor (string)
281+
has_sub_comments = comment.get("hasSubComments", False)
282+
if not has_sub_comments:
283+
continue
263284

264-
sub_comment_pcursor = comment.get("subCommentsPcursor")
265-
if sub_comment_pcursor == "no_more":
285+
# V2 API uses comment_id (int) instead of commentId (string)
286+
root_comment_id = comment.get("comment_id")
287+
if not root_comment_id:
266288
continue
267289

268-
root_comment_id = comment.get("commentId")
269290
sub_comment_pcursor = ""
270291

271292
while sub_comment_pcursor != "no_more":
272293
comments_res = await self.get_video_sub_comments(
273294
photo_id, root_comment_id, sub_comment_pcursor
274295
)
275-
vision_sub_comment_list = comments_res.get("visionSubCommentList", {})
276-
sub_comment_pcursor = vision_sub_comment_list.get("pcursor", "no_more")
296+
# V2 API returns data at top level
297+
sub_comment_pcursor = comments_res.get("pcursorV2", "no_more")
298+
sub_comments = comments_res.get("subCommentsV2", [])
277299

278-
comments = vision_sub_comment_list.get("subComments", {})
279-
if callback:
280-
await callback(photo_id, comments)
300+
if callback and sub_comments:
301+
await callback(photo_id, sub_comments)
281302
await asyncio.sleep(crawl_interval)
282-
result.extend(comments)
303+
result.extend(sub_comments)
283304
return result
284305

285306
async def get_creator_info(self, user_id: str) -> Dict:

store/kuaishou/__init__.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -87,16 +87,22 @@ async def batch_update_ks_video_comments(video_id: str, comments: List[Dict]):
8787

8888

8989
async def update_ks_video_comment(video_id: str, comment_item: Dict):
90-
comment_id = comment_item.get("commentId")
90+
# V2 API uses snake_case field names and comment_id is int type
91+
# Old GraphQL API used camelCase field names
92+
# Support both formats for backward compatibility
93+
comment_id = comment_item.get("comment_id") or comment_item.get("commentId")
9194
save_comment_item = {
92-
"comment_id": comment_id,
95+
"comment_id": str(comment_id) if comment_id else None, # Convert to string for storage
9396
"create_time": comment_item.get("timestamp"),
9497
"video_id": video_id,
9598
"content": comment_item.get("content"),
96-
"user_id": comment_item.get("authorId"),
97-
"nickname": comment_item.get("authorName"),
99+
# V2: author_id, Old: authorId
100+
"user_id": comment_item.get("author_id") or comment_item.get("authorId"),
101+
# V2: author_name, Old: authorName
102+
"nickname": comment_item.get("author_name") or comment_item.get("authorName"),
98103
"avatar": comment_item.get("headurl"),
99-
"sub_comment_count": str(comment_item.get("subCommentCount", 0)),
104+
# V2: commentCount, Old: subCommentCount
105+
"sub_comment_count": str(comment_item.get("commentCount") or comment_item.get("subCommentCount", 0)),
100106
"last_modify_ts": utils.get_current_timestamp(),
101107
}
102108
utils.logger.info(

0 commit comments

Comments
 (0)