Commit 86882346 authored by Ian Dennis Miller's avatar Ian Dennis Miller

refactor logging, rdf building

parent 2df80aff
Pipeline #282 passed with stage
in 15 seconds
......@@ -6,14 +6,6 @@ from twython import Twython, TwythonRateLimitError, TwythonAuthError, TwythonErr
from . import make_uri
import time
import datetime
import logging
logging.basicConfig(
filename="downloader.log",
level=logging.INFO,
format="%(asctime)s:%(levelname)s:%(message)s",
)
class Downloader:
......@@ -47,18 +39,20 @@ class Downloader:
seconds_offset = 10
t = reset_timestamp - now_timestamp + seconds_offset
logging.info('Waiting {0} seconds for Twitter rate limit reset.'.format(t))
self.snapshot.logging.info('Waiting {0} seconds for Twitter rate limit reset.'.format(t))
time.sleep(t)
def get_limits(self):
limits = self.twitter.get_application_rate_limit_status()
logging.debug(json.dumps(limits, indent=True, sort_keys=True))
logging.info("API rate limit remaining")
logging.info("/followers/list: {}".format(limits["resources"]["followers"]["/followers/list"]))
logging.info("/friends/list: {}".format(limits["resources"]["friends"]["/friends/list"]))
self.snapshot.logging.debug(json.dumps(limits, indent=True, sort_keys=True))
self.snapshot.logging.info("API rate limit remaining")
self.snapshot.logging.info("/followers/list: {}".format(
limits["resources"]["followers"]["/followers/list"]))
self.snapshot.logging.info("/friends/list: {}".format(
limits["resources"]["friends"]["/friends/list"]))
def get_user(self, screen_name):
logging.info("Get {} account ".format(screen_name))
self.snapshot.logging.info("Get {} account ".format(screen_name))
success = False
while not success:
......@@ -73,10 +67,10 @@ class Downloader:
self.snapshot.add_account_to_graph(response)
logging.info("OK: get_user")
self.snapshot.logging.info("OK: get_user")
def get_favorites(self, screen_name):
logging.info("Get {} favorites ".format(screen_name))
self.snapshot.logging.info("Get {} favorites ".format(screen_name))
subject = make_uri(screen_name)
......@@ -88,14 +82,14 @@ class Downloader:
count=200,
)
success = True
except TwythonRateLimitError as e:
print(e)
except TwythonRateLimitError:
self.snapshot.logging.warn("Rate limit error; waiting for reset")
success = False
self.wait_for_reset()
except TwythonAuthError as e:
except TwythonAuthError:
success = True
response = []
logging.info("Returned 401; Favorites are private")
self.snapshot.logging.warn("Returned 401; Favorites are private")
for status in response:
self.snapshot.add_favorite_to_graph(subject, status)
......@@ -104,10 +98,10 @@ class Downloader:
status_uri = make_uri(status["id"])
self.snapshot.g.add((subject, self.snapshot.namespace.favorited, status_uri))
logging.info("OK: get_favorites")
self.snapshot.logging.info("OK: get_favorites")
def get_timeline(self, screen_name):
logging.info("Get {} timeline ".format(screen_name))
self.snapshot.logging.info("Get {} timeline ".format(screen_name))
subject = make_uri(screen_name)
......@@ -124,10 +118,10 @@ class Downloader:
except TwythonRateLimitError:
success = False
self.wait_for_reset()
except TwythonAuthError as e:
except TwythonAuthError:
success = True
response = []
logging.info("Returned 401; Timeline is private")
self.snapshot.logging.warn("Returned 401; Timeline is private")
for status in response:
self.snapshot.add_status_to_graph(subject, status)
......@@ -136,7 +130,7 @@ class Downloader:
status_uri = make_uri(status["id"])
self.snapshot.g.add((subject, self.snapshot.namespace.tweeted, status_uri))
logging.info("OK: get_timeline")
self.snapshot.logging.info("OK: get_timeline")
def slow_get(self, screen_name, endpoint, relationship, secondary=False):
subject = make_uri(screen_name)
......@@ -160,9 +154,9 @@ class Downloader:
success = True
next_cursor = False
response = {'users': []}
logging.info("Returned 401; Social net is private")
self.snapshot.logging.warn("Returned 401; Social net is private")
except TwythonError as e:
print(e)
self.snapshot.logging.error(e)
time.sleep(15)
self.reconnect()
time.sleep(15)
......@@ -174,14 +168,14 @@ class Downloader:
self.snapshot.g.add((subject, relationship, acct_uri))
iterations += 1
logging.info("Got page {0}".format(iterations))
self.snapshot.logging.info("Got page {0}".format(iterations))
# in case this is a secondary search, cap the number of pages retrieved
if secondary is True and iterations >= iterator_max:
logging.info("Capped; could get more.")
self.snapshot.logging.info("Capped; could get more.")
break
elif not next_cursor:
logging.info("Retrieved all.")
self.snapshot.logging.info("Retrieved all.")
if iterations == 1:
time.sleep(61)
break
......@@ -189,7 +183,7 @@ class Downloader:
time.sleep(61)
def get_followers(self, screen_name, secondary=False):
logging.info("Get {} followers ".format(screen_name))
self.snapshot.logging.info("Get {} followers ".format(screen_name))
self.slow_get(
screen_name=screen_name,
......@@ -198,10 +192,10 @@ class Downloader:
secondary=secondary
)
logging.info("OK: get_followers")
self.snapshot.logging.info("OK: get_followers")
def get_leaders(self, screen_name, secondary=False):
logging.info("Get {} leaders ".format(screen_name))
self.snapshot.logging.info("Get {} leaders ".format(screen_name))
self.slow_get(
screen_name=screen_name,
......@@ -210,4 +204,4 @@ class Downloader:
secondary=secondary
)
logging.info("OK: get_leaders")
self.snapshot.logging.info("OK: get_leaders")
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment