gui: Improve fuzzy-select heuristics

Even though the code already used non-greedy wildcards before,
it would not find the shortest match, as earlier match starts
would still take precedence.

This could possibly be sped up a bit in CPython by doing
everything inside re using lookahead-assertion trickery, but the
current code is already imperceptibly fast for hundreds of
choices.
This commit is contained in:
David Nadlinger 2020-08-14 02:04:28 +01:00
parent a46573e97a
commit 69718fca90
1 changed files with 32 additions and 10 deletions

View File

@ -32,7 +32,6 @@ class FuzzySelectWidget(LayoutWidget):
:param entry_count_limit: Maximum number of entries to show. :param entry_count_limit: Maximum number of entries to show.
""" """
super().__init__(*args) super().__init__(*args)
self.choices = choices
self.entry_count_limit = entry_count_limit self.entry_count_limit = entry_count_limit
assert entry_count_limit >= 2, ("Need to allow at least two entries " + assert entry_count_limit >= 2, ("Need to allow at least two entries " +
"to show the '<n> not shown' hint") "to show the '<n> not shown' hint")
@ -58,9 +57,12 @@ class FuzzySelectWidget(LayoutWidget):
self.abort_when_menu_hidden = False self.abort_when_menu_hidden = False
self.abort_when_line_edit_unfocussed = True self.abort_when_line_edit_unfocussed = True
self.set_choices(choices)
def set_choices(self, choices: List[Tuple[str, int]]) -> None: def set_choices(self, choices: List[Tuple[str, int]]) -> None:
"""Update the list of choices available to the user.""" """Update the list of choices available to the user."""
self.choices = choices # Keep sorted in the right order for when the query is empty.
self.choices = sorted(choices, key=lambda a: (a[1], a[0]))
if self.menu: if self.menu:
self._update_menu() self._update_menu()
@ -173,19 +175,39 @@ class FuzzySelectWidget(LayoutWidget):
without interruptions), then the position of the match, and finally without interruptions), then the position of the match, and finally
lexicographically. lexicographically.
""" """
query = self.line_edit.text()
if not query:
return [label for label, _ in self.choices]
# Find all "substring" matches of the given query in the labels,
# allowing any number of characters between each query character.
# Sort first by length of match (short matches preferred), to which the
# set weight is also applied, then by location (early in the label
# preferred), and at last alphabetically.
# TODO: More SublimeText-like heuristics taking capital letters and # TODO: More SublimeText-like heuristics taking capital letters and
# punctuation into account. Also, requiring the matches to be in order # punctuation into account. Also, requiring the matches to be in order
# seems to be a bit annoying in practice. # seems to be a bit annoying in practice.
text = self.line_edit.text()
# `re` seems to be the fastest way of doing this in CPython, even with
# all the (non-greedy) wildcards.
suggestions = [] suggestions = []
# `re` seems to be the fastest way of matching this in CPython, even pattern_str = ".*?".join(map(re.escape, query))
# with all the wildcards. pattern = re.compile(pattern_str, flags=re.IGNORECASE)
pat = '.*?'.join(map(re.escape, text.lower()))
regex = re.compile(pat)
for label, weight in self.choices: for label, weight in self.choices:
r = regex.search(label.lower()) matches = []
if r: # Manually loop over shortest matches at each position;
suggestions.append((len(r.group()) - weight, r.start(), label)) # re.finditer() only returns non-overlapping matches.
pos = 0
while True:
r = pattern.search(label, pos=pos)
if not r:
break
start, stop = r.span()
matches.append((stop - start - weight, start, label))
pos = start + 1
if matches:
suggestions.append(min(matches))
return [x for _, _, x in sorted(suggestions)] return [x for _, _, x in sorted(suggestions)]
def _close(self): def _close(self):