Skip to content

Commit 1247721

Browse files
committed
refactor completion candidate sorting
Instead of going only in the order of "for suggestion in suggestions:", retain the implied rank of the order of suggestions, and the fuzziness of the match from find_matches(), and sort by all of fuzziness, rank, and leading match. Primarily this allows the fuzzy matches to always be demoted to the bottom of the list of candidates. The sort order is by (fuzziness, suggestion-rank, leading-match), but could be made configurable.
1 parent 7213203 commit 1247721

File tree

3 files changed

+115
-76
lines changed

3 files changed

+115
-76
lines changed

changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ Features
55
--------
66
* "Eager" completions for the `source` command, limited to `*.sql` files.
77
* Suggest column names from all tables in the current database after SELECT (#212)
8+
* Put fuzzy completions more often to the bottom of the suggestion list.
89

910

1011
Bug Fixes

mycli/sqlcompleter.py

Lines changed: 79 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
from collections import Counter
4+
from enum import IntEnum
45
import logging
56
import re
67
from typing import Any, Collection, Generator, Iterable, Literal
@@ -20,6 +21,14 @@
2021
_logger = logging.getLogger(__name__)
2122

2223

24+
class Fuzziness(IntEnum):
25+
PERFECT = 0
26+
REGEX = 1
27+
UNDER_WORDS = 2
28+
CAMEL_CASE = 3
29+
RAPIDFUZZ = 4
30+
31+
2332
class SQLCompleter(Completer):
2433
favorite_keywords = [
2534
'SELECT',
@@ -956,7 +965,7 @@ def find_matches(
956965
start_only: bool = False,
957966
fuzzy: bool = True,
958967
casing: str | None = None,
959-
) -> Generator[Completion, None, None]:
968+
) -> Generator[tuple[str, int], None, None]:
960969
"""Find completion matches for the given text.
961970
962971
Given the user's input text and a collection of available
@@ -975,10 +984,14 @@ def find_matches(
975984
# unicode support not possible without adding the regex dependency
976985
case_change_pat = re.compile("(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])")
977986

978-
completions: list[str] = []
987+
completions: list[tuple[str, int]] = []
988+
989+
def empty_generator():
990+
for item in []:
991+
yield item
979992

980993
if re.match(r'^[\d\.]', text):
981-
return (Completion(x, -len(text)) for x in completions)
994+
return empty_generator()
982995

983996
if fuzzy:
984997
regex = ".{0,3}?".join(map(re.escape, text))
@@ -989,7 +1002,7 @@ def find_matches(
9891002
for item in collection:
9901003
r = pat.search(item.lower())
9911004
if r:
992-
completions.append(item)
1005+
completions.append((item, Fuzziness.REGEX))
9931006
continue
9941007

9951008
under_words_item = [x for x in item.lower().split('_') if x]
@@ -1000,7 +1013,7 @@ def find_matches(
10001013
occurrences += 1
10011014
break
10021015
if occurrences >= len(under_words_text):
1003-
completions.append(item)
1016+
completions.append((item, Fuzziness.UNDER_WORDS))
10041017
continue
10051018

10061019
case_words_item = re.split(case_change_pat, item)
@@ -1011,7 +1024,7 @@ def find_matches(
10111024
occurrences += 1
10121025
break
10131026
if occurrences >= len(case_words_text):
1014-
completions.append(item)
1027+
completions.append((item, Fuzziness.CAMEL_CASE))
10151028
continue
10161029

10171030
if len(text) >= 4:
@@ -1031,31 +1044,25 @@ def find_matches(
10311044
continue
10321045
if item in completions:
10331046
continue
1034-
completions.append(item)
1047+
completions.append((item, Fuzziness.RAPIDFUZZ))
10351048

10361049
else:
10371050
match_end_limit = len(text) if start_only else None
10381051
for item in collection:
10391052
match_point = item.lower().find(text, 0, match_end_limit)
10401053
if match_point >= 0:
1041-
completions.append(item)
1054+
completions.append((item, Fuzziness.PERFECT))
10421055

10431056
if casing == "auto":
10441057
casing = "lower" if last and (last[0].islower() or last[-1].islower()) else "upper"
10451058

1046-
def apply_case(kw: str) -> str:
1059+
def apply_case(tup: tuple[str, int]) -> tuple[str, int]:
1060+
kw, fuzziness = tup
10471061
if casing == "upper":
1048-
return kw.upper()
1049-
return kw.lower()
1050-
1051-
def exact_leading_key(item: str, text: str):
1052-
if text and item.lower().startswith(text):
1053-
return -1000 + len(item)
1054-
return 0
1062+
return (kw.upper(), fuzziness)
1063+
return (kw.lower(), fuzziness)
10551064

1056-
completions = sorted(completions, key=lambda item: exact_leading_key(item, text))
1057-
1058-
return (Completion(x if casing is None else apply_case(x), -len(text)) for x in completions)
1065+
return (x if casing is None else apply_case(x) for x in completions)
10591066

10601067
def get_completions(
10611068
self,
@@ -1064,19 +1071,26 @@ def get_completions(
10641071
smart_completion: bool | None = None,
10651072
) -> Iterable[Completion]:
10661073
word_before_cursor = document.get_word_before_cursor(WORD=True)
1074+
last_for_len = last_word(word_before_cursor, include="most_punctuations")
1075+
text_for_len = last_for_len.lower()
1076+
10671077
if smart_completion is None:
10681078
smart_completion = self.smart_completion
10691079

10701080
# If smart_completion is off then match any word that starts with
10711081
# 'word_before_cursor'.
10721082
if not smart_completion:
1073-
return self.find_matches(word_before_cursor, self.all_completions, start_only=True, fuzzy=False)
1083+
matches = self.find_matches(word_before_cursor, self.all_completions, start_only=True, fuzzy=False)
1084+
return (Completion(x[0], -len(text_for_len)) for x in matches)
10741085

1075-
completions: list[Completion] = []
1086+
completions: list[tuple[str, int, int]] = []
10761087
suggestions = suggest_type(document.text, document.text_before_cursor)
1088+
rigid_sort = False
10771089

1090+
rank = 0
10781091
for suggestion in suggestions:
10791092
_logger.debug("Suggestion type: %r", suggestion["type"])
1093+
rank += 1
10801094

10811095
if suggestion["type"] == "column":
10821096
tables = suggestion["tables"]
@@ -1093,13 +1107,13 @@ def get_completions(
10931107
scoped_cols = sorted(set(scoped_cols), key=lambda s: s.strip('`'))
10941108

10951109
cols = self.find_matches(word_before_cursor, scoped_cols)
1096-
completions.extend(cols)
1110+
completions.extend([(*x, rank) for x in cols])
10971111

10981112
elif suggestion["type"] == "function":
10991113
# suggest user-defined functions using substring matching
11001114
funcs = self.populate_schema_objects(suggestion["schema"], "functions")
11011115
user_funcs = self.find_matches(word_before_cursor, funcs)
1102-
completions.extend(user_funcs)
1116+
completions.extend([(*x, rank) for x in user_funcs])
11031117

11041118
# suggest hardcoded functions using startswith matching only if
11051119
# there is no schema qualifier. If a schema qualifier is
@@ -1109,67 +1123,69 @@ def get_completions(
11091123
predefined_funcs = self.find_matches(
11101124
word_before_cursor, self.functions, start_only=True, fuzzy=False, casing=self.keyword_casing
11111125
)
1112-
completions.extend(predefined_funcs)
1126+
completions.extend([(*x, rank) for x in predefined_funcs])
11131127

11141128
elif suggestion["type"] == "procedure":
11151129
procs = self.populate_schema_objects(suggestion["schema"], "procedures")
11161130
procs_m = self.find_matches(word_before_cursor, procs)
1117-
completions.extend(procs_m)
1131+
completions.extend([(*x, rank) for x in procs_m])
11181132

11191133
elif suggestion["type"] == "table":
11201134
tables = self.populate_schema_objects(suggestion["schema"], "tables")
11211135
tables_m = self.find_matches(word_before_cursor, tables)
1122-
completions.extend(tables_m)
1136+
completions.extend([(*x, rank) for x in tables_m])
11231137

11241138
elif suggestion["type"] == "view":
11251139
views = self.populate_schema_objects(suggestion["schema"], "views")
11261140
views_m = self.find_matches(word_before_cursor, views)
1127-
completions.extend(views_m)
1141+
completions.extend([(*x, rank) for x in views_m])
11281142

11291143
elif suggestion["type"] == "alias":
11301144
aliases = suggestion["aliases"]
11311145
aliases_m = self.find_matches(word_before_cursor, aliases)
1132-
completions.extend(aliases_m)
1146+
completions.extend([(*x, rank) for x in aliases_m])
11331147

11341148
elif suggestion["type"] == "database":
11351149
dbs_m = self.find_matches(word_before_cursor, self.databases)
1136-
completions.extend(dbs_m)
1150+
completions.extend([(*x, rank) for x in dbs_m])
11371151

11381152
elif suggestion["type"] == "keyword":
11391153
keywords_m = self.find_matches(word_before_cursor, self.keywords, casing=self.keyword_casing)
1140-
completions.extend(keywords_m)
1154+
completions.extend([(*x, rank) for x in keywords_m])
11411155

11421156
elif suggestion["type"] == "show":
11431157
show_items_m = self.find_matches(
11441158
word_before_cursor, self.show_items, start_only=False, fuzzy=True, casing=self.keyword_casing
11451159
)
1146-
completions.extend(show_items_m)
1160+
completions.extend([(*x, rank) for x in show_items_m])
11471161

11481162
elif suggestion["type"] == "change":
11491163
change_items_m = self.find_matches(word_before_cursor, self.change_items, start_only=False, fuzzy=True)
1150-
completions.extend(change_items_m)
1164+
completions.extend([(*x, rank) for x in change_items_m])
11511165

11521166
elif suggestion["type"] == "user":
11531167
users_m = self.find_matches(word_before_cursor, self.users, start_only=False, fuzzy=True)
1154-
completions.extend(users_m)
1168+
completions.extend([(*x, rank) for x in users_m])
11551169

11561170
elif suggestion["type"] == "special":
11571171
special_m = self.find_matches(word_before_cursor, self.special_commands, start_only=True, fuzzy=False)
11581172
# specials are special, and go early in the candidates, first if possible
1159-
completions = list(special_m) + completions
1173+
completions.extend([(*x, 0) for x in special_m])
11601174

11611175
elif suggestion["type"] == "favoritequery":
11621176
if hasattr(FavoriteQueries, 'instance') and hasattr(FavoriteQueries.instance, 'list'):
11631177
queries_m = self.find_matches(word_before_cursor, FavoriteQueries.instance.list(), start_only=False, fuzzy=True)
1164-
completions.extend(queries_m)
1178+
completions.extend([(*x, rank) for x in queries_m])
11651179

11661180
elif suggestion["type"] == "table_format":
11671181
formats_m = self.find_matches(word_before_cursor, self.table_formats)
1168-
completions.extend(formats_m)
1182+
completions.extend([(*x, rank) for x in formats_m])
11691183

11701184
elif suggestion["type"] == "file_name":
11711185
file_names_m = self.find_files(word_before_cursor)
1172-
completions.extend(file_names_m)
1186+
completions.extend([(*x, rank) for x in file_names_m])
1187+
# for filenames we _really_ want directories to go last
1188+
rigid_sort = True
11731189
elif suggestion["type"] == "llm":
11741190
if not word_before_cursor:
11751191
tokens = document.text.split()[1:]
@@ -1182,7 +1198,7 @@ def get_completions(
11821198
start_only=False,
11831199
fuzzy=True,
11841200
)
1185-
completions.extend(subcommands_m)
1201+
completions.extend([(*x, rank) for x in subcommands_m])
11861202
elif suggestion["type"] == "enum_value":
11871203
enum_values = self.populate_enum_values(
11881204
suggestion["tables"],
@@ -1191,23 +1207,44 @@ def get_completions(
11911207
)
11921208
if enum_values:
11931209
quoted_values = [self._quote_sql_string(value) for value in enum_values]
1194-
return list(self.find_matches(word_before_cursor, quoted_values))
1210+
completions = [(*x, rank) for x in self.find_matches(word_before_cursor, quoted_values)]
1211+
break
1212+
1213+
def completion_sort_key(item: tuple[str, int, int], text_for_len: str):
1214+
candidate, fuzziness, rank = item
1215+
if not text_for_len:
1216+
# sort only by the rank (the order of the completion type)
1217+
return (0, rank, 0)
1218+
elif candidate.lower().startswith(text_for_len):
1219+
# sort only by the length of the candidate
1220+
return (0, 0, -1000 + len(candidate))
1221+
# sort by fuzziness and rank
1222+
# todo add alpha here, or original order?
1223+
return (fuzziness, rank, 0)
1224+
1225+
if rigid_sort:
1226+
uniq_completions_str = dict.fromkeys(x[0] for x in completions)
1227+
else:
1228+
sorted_completions = sorted(completions, key=lambda item: completion_sort_key(item, text_for_len.lower()))
1229+
uniq_completions_str = dict.fromkeys(x[0] for x in sorted_completions)
11951230

1196-
return completions
1231+
return (Completion(x, -len(text_for_len)) for x in uniq_completions_str)
11971232

1198-
def find_files(self, word: str) -> Generator[Completion, None, None]:
1233+
def find_files(self, word: str) -> Generator[tuple[str, int], None, None]:
11991234
"""Yield matching directory or file names.
12001235
12011236
:param word:
12021237
:return: iterable
12031238
12041239
"""
1240+
# todo position is ignored, but may need to be used
1241+
# todo fuzzy matches for filenames
12051242
base_path, last_path, position = parse_path(word)
12061243
paths = suggest_path(word)
12071244
for name in paths:
12081245
suggestion = complete_path(name, last_path)
12091246
if suggestion:
1210-
yield Completion(suggestion, position)
1247+
yield (suggestion, Fuzziness.PERFECT)
12111248

12121249
def populate_scoped_cols(self, scoped_tbls: list[tuple[str | None, str, str | None]]) -> list[str]:
12131250
"""Find all columns in a set of scoped_tables

0 commit comments

Comments
 (0)