|
9 | 9 | import json |
10 | 10 | import logging |
11 | 11 | import os |
| 12 | +import re |
12 | 13 | import sys |
13 | 14 | from os import makedirs |
14 | 15 | from os.path import basename, splitext |
|
32 | 33 | DEFAULT_USER_AGENT = 'pelican-plugin-linkbacks' |
33 | 34 | DEFAULT_CERT_VERIFY = True |
34 | 35 | DEFAULT_TIMEOUT = 3 |
| 36 | +DEFAULT_IGNORED_URLS_PATTERN = 'deviantart.com|youtube.com' |
| 37 | +IMAGE_EXTENSIONS = ('.gif', '.jpg', '.pdf', '.png', '.svg') |
35 | 38 | WEBMENTION_POSS_REL = ('webmention', 'http://webmention.org', 'http://webmention.org/', 'https://webmention.org', 'https://webmention.org/') |
36 | 39 |
|
37 | 40 | LOGGER = logging.getLogger(__name__) |
@@ -79,9 +82,12 @@ def process_all_links_of_an_article(config, cache, url, slug, content): |
79 | 82 | if config.siteurl and link_url.startswith(config.siteurl): |
80 | 83 | LOGGER.debug("Link url %s skipped because is starts with %s", link_url, config.siteurl) |
81 | 84 | continue |
82 | | - if splitext(link_url)[1] in ('.gif', '.jpg', '.pdf', '.png', '.svg'): |
| 85 | + if splitext(link_url)[1] in IMAGE_EXTENSIONS: |
83 | 86 | LOGGER.debug("Link url %s skipped because it appears to be an image or PDF file", link_url) |
84 | 87 | continue |
| 88 | + if any(regex.search(link_url) for regex in config.ignored_urls_pattern): |
| 89 | + LOGGER.debug("Link url %s skipped because it matches the ignored URLs pattern", link_url) |
| 90 | + continue |
85 | 91 | cache_status = cache.get_status(slug, link_url) |
86 | 92 | if cache_status: |
87 | 93 | LOGGER.debug("Link url %s skipped because it is present in cache with status: %s", link_url, cache_status) |
@@ -128,6 +134,9 @@ def __init__(self, settings=None): |
128 | 134 | self.cert_verify = settings.get('LINKBACKS_CERT_VERIFY', DEFAULT_CERT_VERIFY) |
129 | 135 | self.timeout = settings.get('LINKBACKS_REQUEST_TIMEOUT', DEFAULT_TIMEOUT) |
130 | 136 | self.user_agent = settings.get('LINKBACKS_USERAGENT', DEFAULT_USER_AGENT) |
| 137 | + self.ignored_urls_pattern = settings.get('LINKBACKS_IGNORED_URLS_PATTERN', DEFAULT_IGNORED_URLS_PATTERN) |
| 138 | + if self.ignored_urls_pattern and isinstance(self.ignored_urls_pattern, str): |
| 139 | + self.ignored_urls_pattern = re.compile(self.ignored_urls_pattern) |
131 | 140 |
|
132 | 141 | class Cache: |
133 | 142 | def __init__(self, config, data): |
|
0 commit comments