aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlec Murphy <alec@checksum.fail>2017-02-23 14:25:42 -0500
committerAlec Murphy <alec@checksum.fail>2017-02-23 14:25:42 -0500
commit00a26bf2641d927af577ca01ba6895236d466119 (patch)
treebf4e282568e2fa9fbb69e24ef0e1957157d8a8d4
parent5fe4ab849c9ab86beae46c0a20707025b8316231 (diff)
Fix relative path updating on 301/302.
-rw-r--r--uriel.py40
1 files changed, 39 insertions, 1 deletions
diff --git a/uriel.py b/uriel.py
index 6de9736..ca83fed 100644
--- a/uriel.py
+++ b/uriel.py
@@ -70,7 +70,9 @@ def UrielGetPage():
if url_comp.query != '':
post_scheme += '?'+url_comp.query
url = scheme + "://" + post_scheme
- pagedata = subprocess.Popen('wget -q -O - -U "' + Uriel.user_agent + '" "' + url + '" 2>/dev/null', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()[0]
+ pagereq = subprocess.Popen('wget -O - -U "' + Uriel.user_agent + '" "' + url + '"', shell=True, stdin=subprocess.PIPE, stderr=subprocess.PIPE, stdout=subprocess.PIPE).communicate()
+ pagedata = pagereq[0]
+ pagehdrs = pagereq[1].split('\n')
filedata = UrielPreProcess(pagedata, url)
filesize = len(filedata)
if filesize>0:
@@ -97,6 +99,42 @@ def UrielGetPage():
os.write(HGBD,Uriel.download_buffer)
logger.info("[Uriel] copy to download buffer " + url)
else:
+ for p_hdr in pagehdrs:
+ if p_hdr.lower().find('location: ') != -1:
+ if p_hdr.lower().find('[following]') != -1:
+ url_comp = urlparse.urlparse(p_hdr[p_hdr.lower().find('location: ')+10:p_hdr.lower().find('[following]')].strip())
+ scheme = ''
+ netloc = ''
+ path = ''
+ if url_comp.scheme == '':
+ scheme = Uriel.rel.scheme
+ else:
+ scheme = url_comp.scheme
+ Uriel.rel.scheme = url_comp.scheme
+ if url_comp.netloc == '':
+ netloc = Uriel.rel.netloc
+ else:
+ netloc = url_comp.netloc
+ Uriel.rel.netloc = url_comp.netloc
+ if url_comp.path != '':
+ if url_comp.path.find('/') != -1:
+ if url_comp.scheme == '' or url_comp.netloc == '':
+ if url_comp.path[:1] != '/':
+ path = Uriel.rel.path + url_comp.path
+ Uriel.rel.path += url_comp.path[:url_comp.path.rfind('/')+1]
+ else:
+ path = url_comp.path
+ Uriel.rel.path = url_comp.path[:url_comp.path.rfind('/')+1]
+ else:
+ path = url_comp.path
+ Uriel.rel.path = url_comp.path[:url_comp.path.rfind('/')+1]
+ else:
+ path = Uriel.rel.path + url_comp.path
+ post_scheme = netloc + "/" + urllib.quote(path)
+ post_scheme = post_scheme.replace('//','/')
+ if url_comp.query != '':
+ post_scheme += '?'+url_comp.query
+ url = scheme + "://" + post_scheme
Uriel.nav_index += 1
Uriel.history = Uriel.history[0:Uriel.nav_index]
Uriel.history.append({'url':url, 'filedata':filedata})