Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ Install required Python packages into the virtualenv
```bash
cd $INSTALL_BASE/linkedevents
pip install -r requirements.txt
pip install -r requirements-dev.txt
```
Create the database, like so: (we have only tested on PostgreSQL)
```bash
Expand All @@ -97,6 +98,8 @@ The last steps are needed if you wish to use location, address or event data fro

The commands below are documented in more detail in [linked-events-importers.md](./linked-events-importers.md#linked-events-importers-and-commands).

**Note**: Running below commands to import all data will take several hours to complete.

```bash
cd $INSTALL_BASE/linkedevents
# Import general Finnish ontology (used by Helsinki UI and Helsinki events)
Expand Down Expand Up @@ -130,6 +133,69 @@ For further erudition, take a look at the DRF documentation on [customizing the

After this, everything but search endpoint (/search) is working. See [search](#search)

## `local_settings.py`
Create this file from `local_settings.py.template` and edit it to contain
settings specific for your own workstation

### Example
Assumptions:
* Windows 10 workstation
* Django running on Windows for development
* Database running PostGIS on a Docker container accessible at _localhost:5555_
* Installed GeoDjango into `C:\OSGeo4W64\`

File contents are:
```python
import os

DEBUG = True

DATABASES = {
'default': {
'ENGINE': 'django.contrib.gis.db.backends.postgis',
'NAME': 'linkedevents',
'USER': 'linkedevents',
'PASSWORD': 'linkedevents',
'HOST': 'localhost',
'PORT': 5555
}
}

CUSTOM_MAPPINGS = {
'autosuggest': {
'search_analyzer': 'standard',
'index_analyzer': 'edgengram_analyzer',
'analyzer': None
},
'text': {
'analyzer': 'default'
}
}

GDAL_LIBRARY_PATH = r'C:\OSGeo4W64\bin\gdal204'
GEOS_LIBRARY_PATH = r'C:\OSGeo4W64\bin\geos_c'
GDAL_DATA = r'C:\OSGeo4W64\share\epsg_csv'
os.environ["GDAL_DATA"] = GDAL_DATA
```

### GeoDjango

#### Linux
tbd

#### macOS
tbd

#### Windows
1. Go to https://trac.osgeo.org/osgeo4w/
1. Download appropriate installation binaries
1. Install
1. Edit `local_settings.py` to contain `GDAL_LIBRARY_PATH = r'C:\OSGeo4W64\bin\gdal204'`
1. For any WFS-access with HTTPS to work (example: Osoite-import), need to have environment variable
`CURL_CA_BUNDLE=C:\OSGeo4W64\bin\curl-ca-bundle.crt`
1. Done!


Production notes
----------------

Expand Down
194 changes: 158 additions & 36 deletions events/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
import bleach
import django_filters
import pytz
import regex
from django.conf import settings
from django.contrib.postgres.search import TrigramSimilarity
from django.contrib.postgres.search import SearchQuery, TrigramSimilarity
from django.core.cache import caches
from django.core.exceptions import PermissionDenied
from django.db.models import Q, QuerySet
Expand Down Expand Up @@ -369,18 +370,6 @@ def __init__(self, *args, **kwargs):
del self.fields[key]
del self.fields[field_name]

# def get_field(self, model_field):
# kwargs = {}
# if issubclass(
# model_field.__class__,
# (django_db_models.CharField,
# django_db_models.TextField)):
# if model_field.null:
# kwargs['allow_none'] = True
# kwargs['max_length'] = getattr(model_field, 'max_length')
# return fields.CharField(**kwargs)
# return super(TranslatedModelSerializer, self).get_field(model_field)

def to_representation(self, obj):
ret = super(TranslatedModelSerializer, self).to_representation(obj)
if obj is None:
Expand Down Expand Up @@ -1748,32 +1737,154 @@ def parse_duration_string(duration):
return int(val) * mul


def _terms_to_regex(terms, operator, fuzziness=3):
"""
Create a compiled regex from of the rpvided terms of the form
r'(\b(term1){e<2}')|(\b(term2){e<2})" This would match a string
with terms aligned in any order allowing two edits per term.
"""

vals = terms.split(',')
valexprs = [r'(\b' + f'({val}){{e<{fuzziness}}})' for val in vals]
if operator == 'AND':
regex_join = ''
elif operator == 'OR':
regex_join = '|'
expr = f"{regex_join.join(valexprs)}"
return regex.compile(expr, regex.IGNORECASE)


def _filter_event_queryset(queryset, params, srs=None):
"""
Filter events queryset by params
(e.g. self.request.query_params in EventViewSet)
(e.g. self.request.query_params ingit EventViewSet)
"""
# Filter by string (case insensitive). This searches from all fields
# which are marked translatable in translation.py
val = params.get('text', None)

val = params.get('local_ongoing_text', None)
if val:
val = val.lower()
qset = Q()
language = params.get('language', 'fi')
langs = settings.FULLTEXT_SEARCH_LANGUAGES
if language not in langs.keys():
raise ParseError(f"{language} not supported. Supported options are: {' '.join(langs.values())}")

query = SearchQuery(val, config=langs[language], search_type='plain')
kwargs = {f'search_vector_{language}': query}
queryset = queryset.filter(**kwargs).filter(end_time__gte=datetime.utcnow().replace(tzinfo=pytz.utc),
deleted=False,
local=True)
cache = caches['ongoing_events']
val = params.get('local_ongoing_OR', None)
if val:
rc = _terms_to_regex(val, 'OR')
ids = {k for k, v in cache.get('local_ids').items() if rc.search(v, concurrent=True)}
queryset = queryset.filter(id__in=ids)

# Free string search from all translated event fields
event_fields = EventTranslationOptions.fields
for field in event_fields:
# check all languages for each field
qset |= _text_qset_by_translated_field(field, val)
val = params.get('local_ongoing_AND', None)
if val:
rc = _terms_to_regex(val, 'AND')
ids = {k for k, v in cache.get('local_ids').items() if rc.search(v, concurrent=True)}
queryset = queryset.filter(id__in=ids)

# Free string search from all translated place fields
place_fields = PlaceTranslationOptions.fields
for field in place_fields:
location_field = 'location__' + field
# check all languages for each field
qset |= _text_qset_by_translated_field(location_field, val)
val = params.get('internet_ongoing_AND', None)
if val:
rc = _terms_to_regex(val, 'AND')
ids = {k for k, v in cache.get('internet_ids').items() if rc.search(v, concurrent=True)}
queryset = queryset.filter(id__in=ids)

queryset = queryset.filter(qset)
val = params.get('internet_ongoing_OR', None)
if val:
rc = _terms_to_regex(val, 'OR')
ids = {k for k, v in cache.get('internet_ids').items() if rc.search(v, concurrent=True)}
queryset = queryset.filter(id__in=ids)

val = params.get('all_ongoing', None)
if val and validate_bool(val, 'all_ongoing'):
ids = {k for i in cache.get_many(['internet_ids', 'local_ids']).values() for k, v in i.items()}
queryset = queryset.filter(id__in=ids)

val = params.get('all_ongoing_AND', None)
if val:
rc = _terms_to_regex(val, 'AND')
cached_ids = {k: v for i in cache.get_many(['internet_ids', 'local_ids']).values() for k, v in i.items()}
ids = {k for k, v in cached_ids.items() if rc.search(v, concurrent=True)}
queryset = queryset.filter(id__in=ids)

val = params.get('all_ongoing_OR', None)
if val:
rc = _terms_to_regex(val, 'OR')
cached_ids = {k: v for i in cache.get_many(['internet_ids', 'local_ids']).values() for k, v in i.items()}
ids = {k for k, v in cached_ids.items() if rc.search(v, concurrent=True)}
queryset = queryset.filter(id__in=ids)

vals = params.get('keyword_set_AND', None)
if vals:
vals = vals.split(',')
keyword_sets = KeywordSet.objects.filter(id__in=vals)
for keyword_set in keyword_sets:
keywords = keyword_set.keywords.all()
qset = Q(keywords__in=keywords)
queryset = queryset.filter(qset)

vals = params.get('keyword_set_OR', None)
if vals:
vals = vals.split(',')
keyword_sets = KeywordSet.objects.filter(id__in=vals)
all_keywords = set()
for keyword_set in keyword_sets:
keywords = keyword_set.keywords.all()
all_keywords.update(keywords)

if 'local_ongoing_OR_set' in ''.join(params):
count = 1
all_ids = []
while f'local_ongoing_OR_set{count}' in params:
val = params.get(f'local_ongoing_OR_set{count}', None)
if val:
rc = _terms_to_regex(val, 'OR')
all_ids.append({k for k, v in cache.get('local_ids').items() if rc.search(v, concurrent=True)})
count += 1
ids = set.intersection(*all_ids)
queryset = queryset.filter(id__in=ids)

if 'internet_ongoing_OR_set' in ''.join(params):
count = 1
all_ids = []
while f'internet_ongoing_OR_set{count}' in params:
val = params.get(f'internet_ongoing_OR_set{count}', None)
if val:
rc = _terms_to_regex(val, 'OR')
all_ids.append({k for k, v in cache.get('internet_ids').items() if rc.search(v, concurrent=True)})
count += 1
ids = set.intersection(*all_ids)
queryset = queryset.filter(id__in=ids)

if 'all_ongoing_OR_set' in ''.join(params):
count = 1
all_ids = []
while f'all_ongoing_OR_set{count}' in params:
val = params.get(f'all_ongoing_OR_set{count}', None)
if val:
rc = _terms_to_regex(val, 'OR')
cached_ids = {k: v for i in cache.get_many(['internet_ids', 'local_ids']).values() for k, v in i.items()} # noqa E501
all_ids.append({k for k, v in cached_ids.items() if rc.search(v, concurrent=True)})
count += 1
ids = set.intersection(*all_ids)
queryset = queryset.filter(id__in=ids)

if 'keyword_OR_set' in ''.join(params):
rc = regex.compile('keyword_OR_set[0-9]*')
all_sets = rc.findall(''.join(params))
for i in all_sets:
val = params.get(i, None)
if val:
val = val.split(',')
queryset = queryset.filter(keywords__pk__in=val)

val = params.get('internet_based', None)
if val and validate_bool(val, 'internet_based'):
queryset = queryset.filter(location__id__contains='internet')

# Filter by event translated fields and keywords combined. The code is
# repeated as this is the first iteration, which will be replaced by a similarity
Expand Down Expand Up @@ -1807,14 +1918,25 @@ def _filter_event_queryset(queryset, params, srs=None):
qset = Q()
queryset = queryset.filter(*qsets)

# This filtering param requires populate_local_event_cache management command
val = params.get('combined_local_ongoing', None)
val = params.get('text', None)
if val:
cache = caches['ongoing_local']
val = val.lower()
vals = val.split(',')
ids = {k for k, v in cache.get('ids').items() if any(val in v for val in vals)}
queryset = queryset.filter(id__in=ids)
qset = Q()

# Free string search from all translated event fields
event_fields = EventTranslationOptions.fields
for field in event_fields:
# check all languages for each field
qset |= _text_qset_by_translated_field(field, val)

# Free string search from all translated place fields
place_fields = PlaceTranslationOptions.fields
for field in place_fields:
location_field = 'location__' + field
# check all languages for each field
qset |= _text_qset_by_translated_field(location_field, val)

queryset = queryset.filter(qset)

val = params.get('last_modified_since', None)
# This should be in format which dateutil.parser recognizes, e.g.
Expand Down Expand Up @@ -2016,7 +2138,7 @@ def _filter_event_queryset(queryset, params, srs=None):
q = q | Q(in_language__id=lang) | Q(**name_arg) | Q(**desc_arg) | Q(**short_desc_arg)
else:
q = q | Q(in_language__id=lang)
queryset = queryset.filter(q)
queryset = queryset.filter(q).distinct()

# Filter by in_language field only
val = params.get('in_language', None)
Expand Down
Empty file.
Empty file.
4 changes: 4 additions & 0 deletions events/importer/helper/importers/vapaaehtoistyofi/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .reader import Reader
from .record import Record

__all__ = ['Reader', 'Record']
Loading