-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTweet_crawler.py
More file actions
71 lines (67 loc) · 2.86 KB
/
Tweet_crawler.py
File metadata and controls
71 lines (67 loc) · 2.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
;;; Tweet Crawler
;;; Copyright © 2018 Uday Kiran Kondreddy <udaykiran.kondreddy@gmail.com>
;;; Copyright © 2018 Naga Teja Mamidapaka <nagatejam@gmail.com>
;;; Copyright © 2018 Farhaan Ahmed Shaik <farhaanfsk@gmail.com>
;;; Copyright © 2018 Subhani Kurra <subhanikurra4@gmail.com>
;;; Copyright © 2018 Yashwant Bezawada <yashwant_b@me.com>
;;;
;;; This file is part of Tweet Crawler.
;;;
;;; Tweet Crawler is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; Tweet Crawler is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; see <http://www.gnu.org/licenses/>.
#importing required libraries
import twitter
import json
import csv
import re
import time
from urllib.parse import unquote
#Twitter API credentials
consumer_key = 'XXXXXXXXXXXXXXX'
consumer_secret = 'XXXXXXXXXXXXXXXX'
access_token = 'XXXXXXXXXXXXXXXXXXXXXXX'
access_token_secret = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXx'
auth = twitter.oauth.OAuth(access_token, access_token_secret,consumer_key,consumer_secret)
twitter_api = twitter.Twitter(auth=auth)
#input Hashtag
query = input("Hashtag:")
#number of tweets required as count
max_count = int(input("Count:"))
search_results = {}
search_results["statuses"] = []
search_results["search_metadata"] = {}
#searching twitter API
search_results = twitter_api.search.tweets(q=query,count=max_count,lang = 'en')
statuses = search_results['statuses']
while(1):
if len(statuses)>=max_count:
break
if 'next_results' in search_results['search_metadata'].keys() and 'search_metadata' in search_results.keys():
next_results = search_results['search_metadata']['next_results']
else:
break
kwargs = dict([ kv.split('=') for kv in unquote(next_results[1:]).split("&") ])
try:
search_results = twitter_api.search.tweets(**kwargs)
except Exception as e:
time.sleep(910)
print('Slept, Continuing after exception:')
search_results = twitter_api.search.tweets(**kwargs)
statuses += search_results['statuses']
with open(query+str(int(time.time()))+'.csv', 'w') as csv_file:
writer = csv.writer(csv_file)
writer.writerow(["Tweet"])
for status in statuses:
if 'RT' not in status["text"]: #omiting the retweets
status["text"] = re.sub(r"(?:\@|https?\://)\S+", "", status["text"]) #removing hyperlinks and @tags
writer.writerow([status["text"],""]) #writing into csv file