Source code for eprc.scheduler
import itertools
import logging
import urllib2
import utils
[docs]class Scheduler(object):
def __init__(self, db, extractor, pypi, verbosity=1):
self.db = db
self.extractor = extractor
self.pypi = pypi
self.done = set()
self.todo = set()
self.blacklist = set()
self.report_counter = 0
self.verbosity = verbosity
def __str__(self):
return "Scheduler done={} todo={} blacklisted={}".format(
len(self.done),
len(self.todo),
len(self.blacklist)
)
[docs] def get(self):
entry = None
while self.todo and not entry:
candidate = self.todo.pop()
if candidate not in self.done:
entry = candidate
self.report_counter += 1
if self.report_counter >= self.verbosity:
self.report_counter = 0
logging.info(str(self))
return entry
[docs] def add_todos_from_db(self, name, version, extra=''):
def add_to_todo(pkg):
for extra_wish in itertools.chain([''], pkg['extras']):
candidate = (utils.normalize(pkg['name']), utils.normalize(extra_wish))
if candidate not in self.done:
self.todo.add(candidate)
data = self.db.get(name, version)
# always add the defaults (without extras)
for pkg in itertools.chain(
data['setup_requires'],
data['install_requires'],
data['tests_require']):
add_to_todo(pkg)
if extra:
for pkg in data['extras_require'].get(extra, []):
add_to_todo(pkg)
[docs] def done_with_all_versions(self, name, extra):
self.done.add((utils.normalize(name), utils.normalize(extra)))
[docs] def blacklist_version(self, name, version):
self.blacklist.add((utils.normalize(name), utils.normalize(version)))
[docs] def is_version_blacklisted(self, name, version):
return (utils.normalize(name), utils.normalize(version)) in self.blacklist
[docs] def process_cached(self, name, extra):
all_versions = self.db.all_versions(name)
if not all_versions:
logging.warn("No versions found for {}".format(name))
for version in all_versions:
self.add_todos_from_db(name, version, extra)
self.done_with_all_versions(name, extra)
[docs] def process_extract(self, name, extra):
native_result = self.extractor.from_native(self.db, name)
try:
name = self.pypi.real_name(name)
except urllib2.HTTPError:
logging.warning("PyPi error for {}".format(name))
return
versions = self.pypi.package_releases(name)
if not versions and not native_result:
logging.warn("No versions found for {}".format(name))
return
for version in versions:
data = self.db.get(name, version)
if data:
logging.info("Cached {}:{}".format(utils.normalize(name), utils.normalize(version)))
elif self.is_version_blacklisted(name, version):
logging.info("Blacklisted {}:{}".format(name, version))
else:
try:
logging.info(
"Fetching {}:{}".format(
utils.normalize(name),
utils.normalize(version)
)
)
data = self.extractor.from_pypi(self.db, name, version)
# did we get something useful?
if not data:
self.blacklist_version(name, version)
except Exception as e:
logging.warn(
"Unhandled exception while processing {}:{} - {}".format(
name,
version,
e
)
)
self.blacklist_version(name, version)
# register
data = self.db.get(name, version)
if data:
self.add_todos_from_db(data['name'], data['version'], extra)
self.done_with_all_versions(name, extra)