Skip to content

Commit 129def0

Browse files
committed
New configuration setting LINKBACKS_IGNORED_URLS_PATTERN
1 parent 40fdc97 commit 129def0

File tree

3 files changed

+12
-2
lines changed

3 files changed

+12
-2
lines changed

.pylintrc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[MESSAGES CONTROL]
2-
disable = broad-except, missing-docstring, multiple-imports, too-few-public-methods, too-many-arguments, too-many-locals, too-many-positional-arguments
2+
disable = broad-except, missing-docstring, multiple-imports, too-few-public-methods, too-many-arguments, too-many-branches, too-many-locals, too-many-positional-arguments
33

44
[FORMAT]
55
max-line-length = 180

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
1111

1212
### Added
1313

14+
- new configuration setting `LINKBACKS_IGNORED_URLS_PATTERN` to define some URLs that should never be considered for linkbacks (_e.g._ `youtube.com`)
1415
- manual execution mode: `python linkbacks.py $pelican_generated_html_file`
1516

1617
### Changed

pelican/plugins/linkbacks/linkbacks.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import json
1010
import logging
1111
import os
12+
import re
1213
import sys
1314
from os import makedirs
1415
from os.path import basename, splitext
@@ -32,6 +33,8 @@
3233
DEFAULT_USER_AGENT = 'pelican-plugin-linkbacks'
3334
DEFAULT_CERT_VERIFY = True
3435
DEFAULT_TIMEOUT = 3
36+
DEFAULT_IGNORED_URLS_PATTERN = 'deviantart.com|youtube.com'
37+
IMAGE_EXTENSIONS = ('.gif', '.jpg', '.pdf', '.png', '.svg')
3538
WEBMENTION_POSS_REL = ('webmention', 'http://webmention.org', 'http://webmention.org/', 'https://webmention.org', 'https://webmention.org/')
3639

3740
LOGGER = logging.getLogger(__name__)
@@ -79,9 +82,12 @@ def process_all_links_of_an_article(config, cache, url, slug, content):
7982
if config.siteurl and link_url.startswith(config.siteurl):
8083
LOGGER.debug("Link url %s skipped because is starts with %s", link_url, config.siteurl)
8184
continue
82-
if splitext(link_url)[1] in ('.gif', '.jpg', '.pdf', '.png', '.svg'):
85+
if splitext(link_url)[1] in IMAGE_EXTENSIONS:
8386
LOGGER.debug("Link url %s skipped because it appears to be an image or PDF file", link_url)
8487
continue
88+
if any(regex.search(link_url) for regex in config.ignored_urls_pattern):
89+
LOGGER.debug("Link url %s skipped because it matches the ignored URLs pattern", link_url)
90+
continue
8591
cache_status = cache.get_status(slug, link_url)
8692
if cache_status:
8793
LOGGER.debug("Link url %s skipped because it is present in cache with status: %s", link_url, cache_status)
@@ -128,6 +134,9 @@ def __init__(self, settings=None):
128134
self.cert_verify = settings.get('LINKBACKS_CERT_VERIFY', DEFAULT_CERT_VERIFY)
129135
self.timeout = settings.get('LINKBACKS_REQUEST_TIMEOUT', DEFAULT_TIMEOUT)
130136
self.user_agent = settings.get('LINKBACKS_USERAGENT', DEFAULT_USER_AGENT)
137+
self.ignored_urls_pattern = settings.get('LINKBACKS_IGNORED_URLS_PATTERN', DEFAULT_IGNORED_URLS_PATTERN)
138+
if self.ignored_urls_pattern and isinstance(self.ignored_urls_pattern, str):
139+
self.ignored_urls_pattern = re.compile(self.ignored_urls_pattern)
131140

132141
class Cache:
133142
def __init__(self, config, data):

0 commit comments

Comments
 (0)