From 581c96892e8a818ce6a421fabbf26c47a8273614 Mon Sep 17 00:00:00 2001 From: Vasily E Date: Sun, 6 Jan 2019 17:02:31 +0300 Subject: [PATCH 1/6] get_hashed_filepath: use os.sep instead of : as method separator : is not allowed character in path under Windows. Signed-off-by: Vasily E --- cappy/cappy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cappy/cappy.py b/cappy/cappy.py index 5247347..0b9f067 100755 --- a/cappy/cappy.py +++ b/cappy/cappy.py @@ -67,7 +67,7 @@ def split_path(path): def get_hashed_filepath(stub, method, parsed_url, params): - hash_template = '{method}:{stub}{param_str}' + hash_template = '{method}%s{stub}{param_str}' % os.sep param_str = '' if not stub: stub = 'index.html' From 234ecc6d4471dc85d16a992ef2aa3d54b7fe21a4 Mon Sep 17 00:00:00 2001 From: Vasily E Date: Sun, 6 Jan 2019 17:08:48 +0300 Subject: [PATCH 2/6] get_cache: use `os.path.dirname` to get dir of cache_file Signed-off-by: Vasily E --- cappy/cappy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cappy/cappy.py b/cappy/cappy.py index 0b9f067..01d0902 100755 --- a/cappy/cappy.py +++ b/cappy/cappy.py @@ -113,7 +113,7 @@ def get_cache(self, parsed_url, url, params={}): log("Cache miss") data = self.make_request(url=url, params=params, method=method) # make dirs before you write to file - dirname, _filename = split_path(cache_file) + dirname = os.path.dirname(cache_file) make_dirs(dirname) file_obj = fopen(cache_file, 'wb+') file_obj.writelines(data) From 50f48e902bfd7db0a6a87b9e38b7c764c1bcf060 Mon Sep 17 00:00:00 2001 From: Vasily E Date: Sun, 6 Jan 2019 17:09:30 +0300 Subject: [PATCH 3/6] split_path: correct dirname path separators for Windows too Signed-off-by: Vasily E --- cappy/cappy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cappy/cappy.py b/cappy/cappy.py index 01d0902..0a47f28 100755 --- a/cappy/cappy.py +++ b/cappy/cappy.py @@ -56,10 +56,10 @@ def split_path(path): last_fragment = split_path[-1] if '.' not in last_fragment: filename = '' - dirname = path + dirname = os.sep.join(split_path) else: filename = last_fragment - dirname = '/'.join(split_path[:-1]) + dirname = os.sep.join(split_path[:-1]) else: filename = '' dirname = path From b7bffe77af5fea9c61f1590ee5ace46f92d9af58 Mon Sep 17 00:00:00 2001 From: Vasily E Date: Wed, 9 Jan 2019 11:05:51 +0300 Subject: [PATCH 4/6] split_path: use `os.path.join` instead of `os.sep.join` Signed-off-by: Vasily E --- cappy/cappy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cappy/cappy.py b/cappy/cappy.py index 0a47f28..6696d5a 100755 --- a/cappy/cappy.py +++ b/cappy/cappy.py @@ -56,10 +56,10 @@ def split_path(path): last_fragment = split_path[-1] if '.' not in last_fragment: filename = '' - dirname = os.sep.join(split_path) + dirname = os.path.join(*split_path) else: filename = last_fragment - dirname = os.sep.join(split_path[:-1]) + dirname = os.path.join(*split_path[:-1]) else: filename = '' dirname = path From f10996acad6a2d9905881fa3180b135a18bc7336 Mon Sep 17 00:00:00 2001 From: Vasily E Date: Wed, 9 Jan 2019 11:24:02 +0300 Subject: [PATCH 5/6] get_hashed_filepath: use MD5 hash for file path Signed-off-by: Vasily E --- cappy/cappy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cappy/cappy.py b/cappy/cappy.py index 6696d5a..c75a02b 100755 --- a/cappy/cappy.py +++ b/cappy/cappy.py @@ -19,6 +19,7 @@ from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry +from hashlib import md5 def log(*args): message = "".join(args) @@ -67,7 +68,6 @@ def split_path(path): def get_hashed_filepath(stub, method, parsed_url, params): - hash_template = '{method}%s{stub}{param_str}' % os.sep param_str = '' if not stub: stub = 'index.html' @@ -77,7 +77,7 @@ def get_hashed_filepath(stub, method, parsed_url, params): param_str = parsed_url.query if param_str: param_str = '?'+param_str - return hash_template.format(method=method, stub=stub, param_str=param_str) + return md5(method + stub + param_str).hexdigest() class CacheHandler(SocketServer.ThreadingMixIn, BaseHTTPServer.BaseHTTPRequestHandler): From f7fd182feb5be3e3d3eff7a5c49d9d1e72f742b9 Mon Sep 17 00:00:00 2001 From: Vasily E Date: Wed, 9 Jan 2019 12:55:22 +0300 Subject: [PATCH 6/6] CacheHandler: handle forbidden characters in directory name Signed-off-by: Vasily E --- cappy/cappy.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cappy/cappy.py b/cappy/cappy.py index c75a02b..be38e38 100755 --- a/cappy/cappy.py +++ b/cappy/cappy.py @@ -20,6 +20,7 @@ from requests.packages.urllib3.util.retry import Retry from hashlib import md5 +from re import compile def log(*args): message = "".join(args) @@ -80,6 +81,8 @@ def get_hashed_filepath(stub, method, parsed_url, params): return md5(method + stub + param_str).hexdigest() +FORBIDDEN = compile('[<>:"|?*]') + class CacheHandler(SocketServer.ThreadingMixIn, BaseHTTPServer.BaseHTTPRequestHandler): # Based on http://sharebear.co.uk/blog/2009/09/17/very-simple-python-caching-proxy/ def get_cache(self, parsed_url, url, params={}): @@ -89,7 +92,10 @@ def get_cache(self, parsed_url, url, params={}): data = None filepath = get_hashed_filepath(stub=filepath_stub, method=method, parsed_url=parsed_url, params=params) - cache_file = os.path.join(get_cache_dir(CACHE_DIR), dirpath, filepath) + # replace characters forbidden by file system with `_` + clean_dirpath = FORBIDDEN.sub('_', dirpath) + + cache_file = os.path.join(get_cache_dir(CACHE_DIR), clean_dirpath, filepath) hit = False if os.path.exists(cache_file): if CACHE_TIMEOUT == 0: