try: # python 2.X import urllib2 import urlparse except: # python 3 import urllib.request as urllib2 import urllib.parse as urlparse import sys from job import Job class FetchJob(Job): """sub-class Job to define a job which download content for a URL""" def __init__(self,link_url,parent_url,actions): self.parent_url = parent_url self.link_url = link_url url = urlparse.urljoin(self.parent_url,self.link_url) Job.__init__(self,url,actions) self.content = "" self.error = "" def getUrl(self): return self.getKey() def run(self): try: opener = urllib2.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] infile = opener.open(self.getUrl()) # FIXME check about getting the encoding right here self.content = str(infile.read()) return True except Exception: _, ex, _ = sys.exc_info() self.error = str(ex) return False def getContent(self): return self.content def getError(self): return self.error