913 lines
36 KiB
Diff
913 lines
36 KiB
Diff
From 312c215e717654e55fa48ec968f412201d2a5544 Mon Sep 17 00:00:00 2001
|
||
From: Harald Sitter <sitter@kde.org>
|
||
Date: Mon, 14 Jul 2025 17:28:14 +0200
|
||
Subject: [PATCH] servicerunner: fuzzy match
|
||
|
||
use a bitap implementation instead of doing awkward contains dances.
|
||
this should lead to somewhat more reliable results, which are now more
|
||
comprehensively asserted in the unit test
|
||
|
||
at the heart of this is a new fuzzyScore function that assigns a score
|
||
to a service vis a vis a query. this score is adjusted depending on
|
||
which field it is regarding (name > genericname > keywords).
|
||
this should hopefully ensure that a match against name outweighs most
|
||
other matches. all scores are eventually assembled into a final score
|
||
that gets used as match relevance
|
||
---
|
||
runners/services/autotests/CMakeLists.txt | 3 +
|
||
runners/services/autotests/bitaptest.cpp | 70 +++++
|
||
.../autotests/fixtures/audacity.desktop | 2 +-
|
||
.../fixtures/org.kde.discover.desktop | 17 ++
|
||
.../autotests/fixtures/org.kde.kpat.desktop | 2 +-
|
||
.../services/autotests/servicerunnertest.cpp | 94 ++++--
|
||
runners/services/bitap.h | 178 +++++++++++
|
||
runners/services/levenshtein.h | 58 ++++
|
||
runners/services/servicerunner.cpp | 286 +++++++++++-------
|
||
9 files changed, 576 insertions(+), 134 deletions(-)
|
||
create mode 100644 runners/services/autotests/bitaptest.cpp
|
||
create mode 100755 runners/services/autotests/fixtures/org.kde.discover.desktop
|
||
create mode 100644 runners/services/bitap.h
|
||
create mode 100644 runners/services/levenshtein.h
|
||
|
||
diff --git a/runners/services/autotests/CMakeLists.txt b/runners/services/autotests/CMakeLists.txt
|
||
index 04849a2928..ff7ec66634 100644
|
||
--- a/runners/services/autotests/CMakeLists.txt
|
||
+++ b/runners/services/autotests/CMakeLists.txt
|
||
@@ -6,3 +6,6 @@ remove_definitions(-DQT_NO_CAST_FROM_ASCII)
|
||
ecm_add_test(servicerunnertest.cpp TEST_NAME servicerunnertest
|
||
LINK_LIBRARIES Qt::Test KF6::Service KF6::Runner)
|
||
krunner_configure_test(servicerunnertest krunner_services)
|
||
+
|
||
+ecm_add_test(bitaptest.cpp TEST_NAME bitaptest
|
||
+ LINK_LIBRARIES Qt::Test)
|
||
diff --git a/runners/services/autotests/bitaptest.cpp b/runners/services/autotests/bitaptest.cpp
|
||
new file mode 100644
|
||
index 0000000000..1a1cb856ec
|
||
--- /dev/null
|
||
+++ b/runners/services/autotests/bitaptest.cpp
|
||
@@ -0,0 +1,70 @@
|
||
+// SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL
|
||
+// SPDX-FileCopyrightText: 2025 Harald Sitter <sitter@kde.org>
|
||
+
|
||
+#include <QDebug>
|
||
+#include <QDir>
|
||
+#include <QFile>
|
||
+#include <QObject>
|
||
+#include <QStandardPaths>
|
||
+#include <QTest>
|
||
+#include <QThread>
|
||
+
|
||
+#include "../bitap.h"
|
||
+
|
||
+class BitapTest : public QObject
|
||
+{
|
||
+ Q_OBJECT
|
||
+private Q_SLOTS:
|
||
+ void initTestCase()
|
||
+ {
|
||
+ }
|
||
+ void cleanupTestCase()
|
||
+ {
|
||
+ }
|
||
+
|
||
+ void testBitap()
|
||
+ {
|
||
+ using namespace Bitap;
|
||
+ // The macro has trouble with designated initializers, so we wrap them in ().
|
||
+ QCOMPARE(bitap(u"hello world", u"hello", 1), (Match{.end = 4, .distance = 0}));
|
||
+ QCOMPARE(bitap(u"wireshark", u"di", 1), (Match{.end = 1, .distance = 1}));
|
||
+ QCOMPARE(bitap(u"discover", u"disk", 1), (Match{.end = 2, .distance = 1}));
|
||
+ QCOMPARE(bitap(u"discover", u"disc", 1), (Match{.end = 3, .distance = 0}));
|
||
+ QCOMPARE(bitap(u"discover", u"scov", 1), (Match{.end = 5, .distance = 0}));
|
||
+ QCOMPARE(bitap(u"discover", u"diki", 1), std::nullopt);
|
||
+ QCOMPARE(bitap(u"discover", u"obo", 1), std::nullopt);
|
||
+ // With a hamming distance of 1 this may match because it is a single transposition.
|
||
+ QCOMPARE(bitap(u"discover", u"dicsover", 1), (Match{.end = 7, .distance = 1}));
|
||
+ // … but with three characters out of place things should not match.
|
||
+ QCOMPARE(bitap(u"discover", u"dicosver", 1), std::nullopt);
|
||
+ // pattern too long
|
||
+ QCOMPARE(bitap(u"discover", u" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~", 1), std::nullopt);
|
||
+ // This is not a transposition as per Damerau–Levenshtein distance because the characters are not adjacent.
|
||
+ QCOMPARE(bitap(u"steam", u"skeap", 1), std::nullopt);
|
||
+ // Deletion required
|
||
+ QCOMPARE(bitap(u"discover", u"discover", 1), (Match{.end = 7, .distance = 0}));
|
||
+ QCOMPARE(bitap(u"discover", u"discovery", 1), (Match{.end = 7, .distance = 1}));
|
||
+ // Insertion required
|
||
+ QCOMPARE(bitap(u"discover", u"dicover", 1), (Match{.end = 7, .distance = 1}));
|
||
+ }
|
||
+
|
||
+ void testScore()
|
||
+ {
|
||
+ using namespace Bitap;
|
||
+ // aperfectten has 10 big beautiful indexes. The maximum end is therefore 10.
|
||
+ QCOMPARE(score(u"aperfectten", Match{.end = 10, .distance = 0}, 1), 1.0);
|
||
+ QCOMPARE(score(u"aperfectten", Match{.end = 4, .distance = 0}, 1), 0.4);
|
||
+ QCOMPARE(score(u"aperfectten", Match{.end = 4, .distance = 1}, 1), 0.35);
|
||
+ QCOMPARE(score(u"aperfectten", Match{.end = 0, .distance = 0}, 0), 0);
|
||
+ QCOMPARE(score(u"aperfectten", Match{.end = 0, .distance = 0}, 1), 0);
|
||
+ QCOMPARE(score(u"aperfectten", Match{.end = 1, .distance = 1}, 1), 0.05);
|
||
+
|
||
+ QCOMPARE(score(u"abc", Match{.end = 2, .distance = 1}, 1), 0.95);
|
||
+ // Ask for distance 0 but it has a distance so this is a super bad match.
|
||
+ QCOMPARE(score(u"abc", Match{.end = 2, .distance = 1}, 0), 0);
|
||
+ }
|
||
+};
|
||
+
|
||
+QTEST_MAIN(BitapTest)
|
||
+
|
||
+#include "bitaptest.moc"
|
||
diff --git a/runners/services/autotests/fixtures/audacity.desktop b/runners/services/autotests/fixtures/audacity.desktop
|
||
index 7613d9f32f..05e1b9d929 100644
|
||
--- a/runners/services/autotests/fixtures/audacity.desktop
|
||
+++ b/runners/services/autotests/fixtures/audacity.desktop
|
||
@@ -1,5 +1,5 @@
|
||
[Desktop Entry]
|
||
-Name=Audacity
|
||
+Name=Audacity ServiceRunnerTest
|
||
GenericName=Sound Editor
|
||
Comment=Record and edit audio files
|
||
Keywords=audio;sound;alsa;jack;editor;
|
||
diff --git a/runners/services/autotests/fixtures/org.kde.discover.desktop b/runners/services/autotests/fixtures/org.kde.discover.desktop
|
||
new file mode 100755
|
||
index 0000000000..978b2b4152
|
||
--- /dev/null
|
||
+++ b/runners/services/autotests/fixtures/org.kde.discover.desktop
|
||
@@ -0,0 +1,17 @@
|
||
+# SPDX-FileCopyrightText: None
|
||
+# SPDX-License-Identifier: CC0-1.0
|
||
+[Desktop Entry]
|
||
+Name=Discover ServiceRunnerTest
|
||
+Comment=Install and remove apps and add-ons
|
||
+MimeType=application/vnd.flatpak;application/vnd.flatpak.repo;application/vnd.flatpak.ref;
|
||
+Exec=plasma-discover %F
|
||
+Icon=plasmadiscover
|
||
+Type=Application
|
||
+X-DocPath=plasma-discover/index.html
|
||
+InitialPreference=5
|
||
+NoDisplay=false
|
||
+Actions=Updates;
|
||
+SingleMainWindow=true
|
||
+GenericName=Software Center
|
||
+Categories=Qt;KDE;System;
|
||
+Keywords=program;software;store;repository;package;add;install;uninstall;remove;update;apps;applications;games;flatpak;snap;addons;add-ons;firmware;
|
||
diff --git a/runners/services/autotests/fixtures/org.kde.kpat.desktop b/runners/services/autotests/fixtures/org.kde.kpat.desktop
|
||
index 71d7fd2a89..3a91d89afe 100644
|
||
--- a/runners/services/autotests/fixtures/org.kde.kpat.desktop
|
||
+++ b/runners/services/autotests/fixtures/org.kde.kpat.desktop
|
||
@@ -1,7 +1,7 @@
|
||
# SPDX-FileCopyrightText: 2022 Alexander Lohnau <alexander.lohnau@gmx.de>
|
||
# SPDX-License-Identifier: CC0-1.0
|
||
[Desktop Entry]
|
||
-Name=KPatience
|
||
+Name=KPatience ServiceRunnerTest
|
||
Exec=true -qwindowtitle %c %u
|
||
Type=Application
|
||
Icon=kpat
|
||
diff --git a/runners/services/autotests/servicerunnertest.cpp b/runners/services/autotests/servicerunnertest.cpp
|
||
index fcfd3275ac..b911667a3b 100644
|
||
--- a/runners/services/autotests/servicerunnertest.cpp
|
||
+++ b/runners/services/autotests/servicerunnertest.cpp
|
||
@@ -36,6 +36,10 @@ private Q_SLOTS:
|
||
void testINotifyUsage();
|
||
void testSpecialArgs();
|
||
void testEnv();
|
||
+ void testDisassociation();
|
||
+ void testMultipleKeywords();
|
||
+ void testMultipleNameWords();
|
||
+ void testDiscover();
|
||
};
|
||
|
||
void ServiceRunnerTest::initTestCase()
|
||
@@ -86,8 +90,8 @@ void ServiceRunnerTest::testExecutableExactMatch()
|
||
|
||
void ServiceRunnerTest::testKonsoleVsYakuakeComment()
|
||
{
|
||
- // Yakuake has konsole mentioned in comment, should be rated lower.
|
||
- const auto matches = launchQuery(QStringLiteral("kons"));
|
||
+ // Yakuake has konsole mentioned in comment, should not be listed (if it was it should be lower)
|
||
+ auto matches = launchQueryAndSort(QStringLiteral("kons"));
|
||
|
||
bool konsoleFound = false;
|
||
bool yakuakeFound = false;
|
||
@@ -97,17 +101,10 @@ void ServiceRunnerTest::testKonsoleVsYakuakeComment()
|
||
continue;
|
||
}
|
||
|
||
- if (match.text() == QLatin1String("Konsole ServiceRunnerTest")) {
|
||
- QCOMPARE(match.relevance(), 0.99);
|
||
- konsoleFound = true;
|
||
- } else if (match.text() == QLatin1String("Yakuake ServiceRunnerTest")) {
|
||
- // Rates lower because it doesn't have it in the name.
|
||
- QCOMPARE(match.relevance(), 0.59);
|
||
- yakuakeFound = true;
|
||
- }
|
||
- }
|
||
- QVERIFY(konsoleFound);
|
||
- QVERIFY(yakuakeFound);
|
||
+ QCOMPARE(texts,
|
||
+ QStringList({
|
||
+ u"Konsole ServiceRunnerTest"_s,
|
||
+ }));
|
||
}
|
||
|
||
void ServiceRunnerTest::testSystemSettings()
|
||
@@ -150,8 +147,9 @@ void ServiceRunnerTest::testSystemSettings2()
|
||
foreignSystemSettingsFound = true;
|
||
}
|
||
}
|
||
- QVERIFY(systemSettingsFound);
|
||
- QVERIFY(!foreignSystemSettingsFound);
|
||
+
|
||
+ // The matched texts will contain much more because of the generic search term. Make sure our settings win.
|
||
+ QCOMPARE(texts.at(0), u"System Settings ServiceRunnerTest"_s);
|
||
}
|
||
|
||
void ServiceRunnerTest::testCategories()
|
||
@@ -172,10 +170,6 @@ void ServiceRunnerTest::testCategories()
|
||
QVERIFY(std::none_of(matches.cbegin(), matches.cend(), [](const KRunner::QueryMatch &match) {
|
||
return match.text() == QLatin1String("Konsole ServiceRunnerTest");
|
||
}));
|
||
-
|
||
- // Query too short to match any category
|
||
- matches = launchQuery(QStringLiteral("Dumm"));
|
||
- QVERIFY(matches.isEmpty());
|
||
}
|
||
|
||
void ServiceRunnerTest::testJumpListActions()
|
||
@@ -234,6 +228,68 @@ void ServiceRunnerTest::testEnv()
|
||
}));
|
||
}
|
||
|
||
+void ServiceRunnerTest::testDisassociation()
|
||
+{
|
||
+ // This test makes sure that we do not associate a service with a query that is not relevant.
|
||
+ auto matches = launchQueryAndSort(u"new laptop com"_s); // particularly notorious because it has two three letter words; 'com' is an incomplete word
|
||
+
|
||
+ QStringList texts;
|
||
+ for (const auto &match : matches) {
|
||
+ texts.push_back(match.text());
|
||
+ }
|
||
+
|
||
+ QCOMPARE(texts, QStringList());
|
||
+}
|
||
+
|
||
+void ServiceRunnerTest::testMultipleKeywords()
|
||
+{
|
||
+ auto matches = launchQueryAndSort(u"text editor programming"_s);
|
||
+
|
||
+ QStringList texts;
|
||
+ for (const auto &match : matches) {
|
||
+ texts.push_back(match.text());
|
||
+ }
|
||
+
|
||
+ QCOMPARE(texts,
|
||
+ QStringList({
|
||
+ u"Kate ServiceRunnerTest"_s,
|
||
+ }));
|
||
+}
|
||
+
|
||
+void ServiceRunnerTest::testMultipleNameWords()
|
||
+{
|
||
+ auto matches = launchQueryAndSort(u"system settings"_s);
|
||
+
|
||
+ QStringList texts;
|
||
+ for (const auto &match : matches) {
|
||
+ if (!match.text().contains("ServiceRunnerTest"_L1)) {
|
||
+ continue;
|
||
+ }
|
||
+ texts.push_back(match.text());
|
||
+ }
|
||
+
|
||
+ QCOMPARE(texts,
|
||
+ QStringList({
|
||
+ u"System Settings ServiceRunnerTest"_s,
|
||
+ }));
|
||
+}
|
||
+
|
||
+void ServiceRunnerTest::testDiscover()
|
||
+{
|
||
+ auto matches = launchQueryAndSort(u"disco"_s);
|
||
+
|
||
+ QStringList texts;
|
||
+ for (const auto &match : matches) {
|
||
+ texts.push_back(match.text());
|
||
+ }
|
||
+
|
||
+ qDebug() << texts;
|
||
+ QCOMPARE(texts,
|
||
+ QStringList({
|
||
+ u"Discover ServiceRunnerTest"_s,
|
||
+ }));
|
||
+}
|
||
+
|
||
QTEST_MAIN(ServiceRunnerTest)
|
||
|
||
#include "servicerunnertest.moc"
|
||
diff --git a/runners/services/bitap.h b/runners/services/bitap.h
|
||
new file mode 100644
|
||
index 0000000000..a6aedb7eaf
|
||
--- /dev/null
|
||
+++ b/runners/services/bitap.h
|
||
@@ -0,0 +1,178 @@
|
||
+// SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL
|
||
+// SPDX-FileCopyrightText: 2025 Harald Sitter <sitter@kde.org>
|
||
+
|
||
+#pragma once
|
||
+
|
||
+#include <bitset>
|
||
+#include <optional>
|
||
+
|
||
+#include <QDebug>
|
||
+#include <QLoggingCategory>
|
||
+#include <QString>
|
||
+
|
||
+namespace Bitap
|
||
+{
|
||
+
|
||
+Q_DECLARE_LOGGING_CATEGORY(BITAP)
|
||
+Q_LOGGING_CATEGORY(BITAP, "org.kde.plasma.runner.services.bitap", QtWarningMsg)
|
||
+
|
||
+struct Match {
|
||
+ qsizetype end;
|
||
+ qsizetype distance;
|
||
+
|
||
+ bool operator==(const Match &other) const = default;
|
||
+};
|
||
+
|
||
+inline QDebug operator<<(QDebug dbg, const Bitap::Match &match)
|
||
+{
|
||
+ dbg.nospace() << "Bitap::Match(" << match.end << ", " << match.distance << ")";
|
||
+ return dbg;
|
||
+}
|
||
+
|
||
+// Bitap is a bit of a complicated algorithm thanks to bitwise operations. I've opted to replace them with bitsets for readability.
|
||
+// It creates a patternMask based on all characters in the pattern. Basically each character gets assigned a representative bit.
|
||
+// e.g. in the pattern 'abc' the character 'a' would be 110, 'b' 101, 'c' 011.
|
||
+// This is a bit expensive up front but allows it to carry out everything else using bitwise operations.
|
||
+// For each match we set a matching bit in the bits vector.
|
||
+// Matching happens within a hamming distance, meaning up to `hammingDistance` characters can be out of place.
|
||
+inline std::optional<Match> bitap(const QStringView &name, const QStringView &pattern, int hammingDistance)
|
||
+{
|
||
+ qCDebug(BITAP) << "Bitap called with name:" << name << "and pattern:" << pattern << "with hamming distance:" << hammingDistance;
|
||
+ const auto patternEndIndex = pattern.size() - 1;
|
||
+ if (name == pattern) {
|
||
+ return Match{.end = patternEndIndex, .distance = 0}; // Perfect match
|
||
+ }
|
||
+
|
||
+ if (pattern.isEmpty() || name.isEmpty()) {
|
||
+ return std::nullopt;
|
||
+ }
|
||
+
|
||
+ // Being a bitset we could have any number of bits, but practically we probably don't need more than 64, most bitaps I've seen even use 32.
|
||
+ constexpr auto maxMaskBits = 64;
|
||
+ using Mask = std::bitset<maxMaskBits>;
|
||
+ using PatternMask = std::array<Mask, std::numeric_limits<char16_t>::max()>;
|
||
+
|
||
+ // The way bitap works is that each bit of the Mask represents a character position. Because of this we cannot match
|
||
+ // more characters than we have bits for.
|
||
+ // -1 because one bit is used for the result (I think)
|
||
+ if (pattern.size() >= qsizetype(Mask().size()) - 1) {
|
||
+ qCWarning(BITAP) << "Pattern is too long for bitap algorithm, max length is" << Mask().size() - 1;
|
||
+ return std::nullopt;
|
||
+ }
|
||
+
|
||
+ const PatternMask patternMask = [&pattern, &name] {
|
||
+ PatternMask patternMask;
|
||
+ // The following is an optimized version of patternMask.fill(Mask().set()); to set all **necessary** bits to 1.
|
||
+ for (const auto &qchar : pattern) {
|
||
+ patternMask.at(qchar.unicode()).set();
|
||
+ }
|
||
+ for (const auto &qchar : name) {
|
||
+ patternMask.at(qchar.unicode()).set();
|
||
+ }
|
||
+
|
||
+ for (int i = 0; i < pattern.size(); ++i) {
|
||
+ const auto char_ = pattern.at(i).unicode();
|
||
+ patternMask.at(char_).reset(i); // unset the relevant index bits
|
||
+ }
|
||
+
|
||
+ if (BITAP().isDebugEnabled()) {
|
||
+ for (const auto &i : pattern) {
|
||
+ const auto char_ = i.unicode();
|
||
+ qCDebug(BITAP) << "Pattern mask for" << char_ << "is" << patternMask.at(char_).to_string();
|
||
+ }
|
||
+ }
|
||
+
|
||
+ return patternMask;
|
||
+ }();
|
||
+
|
||
+ Match match{
|
||
+ .end = -1, // -1 means no match found for convenience
|
||
+ .distance = name.size(),
|
||
+ };
|
||
+
|
||
+ std::vector<Mask> bits((hammingDistance + 1), Mask().set().reset(0));
|
||
+ std::vector<Mask> transpositions(bits.cbegin(), bits.cend());
|
||
+ for (int i = 0; i < name.size(); ++i) {
|
||
+ const auto &char_ = name.at(i);
|
||
+ auto previousBit = bits[0];
|
||
+ const auto mask = patternMask.at(char_.unicode());
|
||
+ bits[0] |= mask;
|
||
+ bits[0] <<= 1;
|
||
+
|
||
+ for (int j = 1; j <= hammingDistance; ++j) {
|
||
+ auto bit = bits[j];
|
||
+ auto current = (bit | mask) << 1;
|
||
+ // https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
|
||
+ auto substitute = previousBit << 1;
|
||
+ auto delete_ = bits[j - 1] << 1;
|
||
+ auto insert = previousBit;
|
||
+ auto transpose = (transpositions[j - 1] | (mask << 1)) << 1;
|
||
+ bits[j] = current & substitute & transpose & delete_ & insert;
|
||
+ transpositions[j - 1] = (previousBit << 1) | mask;
|
||
+ previousBit = bit;
|
||
+ }
|
||
+
|
||
+ if (BITAP().isDebugEnabled()) {
|
||
+ qCDebug(BITAP) << "After processing character" << char_ << "at index" << i;
|
||
+ for (const auto &bit : bits) {
|
||
+ qCDebug(BITAP) << "bit" << bit.to_string();
|
||
+ }
|
||
+ }
|
||
+
|
||
+ for (int k = 0; k <= hammingDistance; ++k) {
|
||
+ // If the bit at the end of the mask is 0, it means we have a match.
|
||
+ if (0 == (bits[k] & Mask().set(pattern.size()))) {
|
||
+ if (k < match.distance && match.end < i) {
|
||
+ qCDebug(BITAP) << "Match found at index" << i << "with hamming distance" << k << "better than previous match with distance"
|
||
+ << match.distance << "at index" << match.end;
|
||
+ match = {
|
||
+ .end = i,
|
||
+ .distance = k,
|
||
+ };
|
||
+ }
|
||
+ // We do not return early because we want to find the best match, not just any.
|
||
+ // e.g. with a maximum distance of 1 `disc` could match `disc` either at index two with distance one, or at index three with distance zero.
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+
|
||
+ // Because we use a complete Damerau–Levenshtein distance the return value is a bit complicated. The trick is that the distance incurs a negative penalty
|
||
+ // in relation to the max distance. While an end that is closer to the real end is generally favorably. Combining the two into a single value
|
||
+ // would complicate the meaning of the return value to mean "approximate end with random penalty". This is garbage to reason about so instead we return
|
||
+ // both values and then assign them meaning in the score function.
|
||
+ if (match.end != -1) {
|
||
+ return match;
|
||
+ }
|
||
+
|
||
+ qCDebug(BITAP) << "No match found for pattern" << pattern << "in name" << name;
|
||
+ return std::nullopt;
|
||
+}
|
||
+
|
||
+inline qreal score(const QStringView &name, const auto &match, auto hammingDistance)
|
||
+{
|
||
+ // Normalize the score to a value between 0.0 and 1.0
|
||
+ // No distance means the score is directly correlated to the end index. The more characters matched the higher the score.
|
||
+ // Any distance will lower the score by a sub 0.1 margin.
|
||
+
|
||
+ if (name.size() == 0) {
|
||
+ return 0.0; // No name, no score.
|
||
+ }
|
||
+
|
||
+ const auto maxEnd = name.size() - 1;
|
||
+ const auto penalty = [&] {
|
||
+ if (hammingDistance <= 0) {
|
||
+ return 1.0; // No penalty for no distance
|
||
+ }
|
||
+ constexpr auto tenth = 10.0;
|
||
+ constexpr auto half = 2.0;
|
||
+ return qreal(match.distance) / qreal(hammingDistance) / tenth / half;
|
||
+ }();
|
||
+ auto score = qreal(match.end) / qreal(maxEnd);
|
||
+ // Prevent underflows when the penalty is larger than the score.
|
||
+ score = std::max(0.0, score - penalty);
|
||
+
|
||
+ Q_ASSERT(score >= 0.0 && score <= 1.0);
|
||
+ return score;
|
||
+}
|
||
+
|
||
+} // namespace Bitap
|
||
diff --git a/runners/services/levenshtein.h b/runners/services/levenshtein.h
|
||
new file mode 100644
|
||
index 0000000000..0efb960be3
|
||
--- /dev/null
|
||
+++ b/runners/services/levenshtein.h
|
||
@@ -0,0 +1,58 @@
|
||
+// SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL
|
||
+// SPDX-FileCopyrightText: 2025 Harald Sitter <sitter@kde.org>
|
||
+
|
||
+#pragma
|
||
+
|
||
+#include <QLoggingCategory>
|
||
+#include <QString>
|
||
+
|
||
+namespace Levenshtein
|
||
+{
|
||
+
|
||
+inline int distance(const QStringView &name, const QStringView &query)
|
||
+{
|
||
+ if (name == query) {
|
||
+ return 0;
|
||
+ }
|
||
+
|
||
+ std::vector<int> distance0(query.size() + 1, 0);
|
||
+ std::vector<int> distance1(query.size() + 1, 0);
|
||
+
|
||
+ for (int i = 0; i <= query.size(); ++i) {
|
||
+ distance0[i] = i;
|
||
+ }
|
||
+
|
||
+ for (int i = 0; i < name.size(); ++i) {
|
||
+ distance1[0] = i + 1;
|
||
+ for (int j = 0; j < query.size(); ++j) {
|
||
+ const auto deletionCost = distance0[j + 1] + 1;
|
||
+ const auto insertionCost = distance1[j] + 1;
|
||
+ const auto substitutionCost = [&] {
|
||
+ if (name[i] == query[j]) {
|
||
+ return distance0[j];
|
||
+ }
|
||
+ return distance0[j] + 1;
|
||
+ }();
|
||
+ distance1[j + 1] = std::min({deletionCost, insertionCost, substitutionCost});
|
||
+ }
|
||
+ std::swap(distance0, distance1);
|
||
+ }
|
||
+ return distance0[query.size()];
|
||
+}
|
||
+
|
||
+inline qreal score(const QStringView &name, int distance)
|
||
+{
|
||
+ // Normalize the distance to a value between 0.0 and 1.0
|
||
+ // The maximum distance is the length of the pattern.
|
||
+ // If the distance is 0, it means a perfect match, so we return 1.0.
|
||
+ // If the distance is equal to the length of the pattern, we return 0.0.
|
||
+ if (distance == 0) {
|
||
+ return 1.0;
|
||
+ }
|
||
+ if (distance >= name.size()) {
|
||
+ return 0.0;
|
||
+ }
|
||
+ return 1.0 - (qreal(distance) / qreal(name.size()));
|
||
+}
|
||
+
|
||
+} // namespace Levenshtein
|
||
diff --git a/runners/services/servicerunner.cpp b/runners/services/servicerunner.cpp
|
||
index eb9f02e74b..3d5de8feb2 100644
|
||
--- a/runners/services/servicerunner.cpp
|
||
+++ b/runners/services/servicerunner.cpp
|
||
@@ -1,7 +1,7 @@
|
||
/*
|
||
SPDX-FileCopyrightText: 2006 Aaron Seigo <aseigo@kde.org>
|
||
SPDX-FileCopyrightText: 2014 Vishesh Handa <vhanda@kde.org>
|
||
- SPDX-FileCopyrightText: 2016-2020 Harald Sitter <sitter@kde.org>
|
||
+ SPDX-FileCopyrightText: 2016-2025 Harald Sitter <sitter@kde.org>
|
||
SPDX-FileCopyrightText: 2022-2023 Alexander Lohnau <alexander.lohnau@gmx.de>
|
||
|
||
SPDX-License-Identifier: LGPL-2.0-only
|
||
@@ -21,6 +21,7 @@
|
||
#include <QUrlQuery>
|
||
|
||
#include <KApplicationTrader>
|
||
+#include <KFuzzyMatcher>
|
||
#include <KLocalizedString>
|
||
#include <KNotificationJobUiDelegate>
|
||
#include <KServiceAction>
|
||
@@ -35,22 +36,130 @@
|
||
#include <KIO/ApplicationLauncherJob>
|
||
#include <KIO/DesktopExecParser>
|
||
|
||
+#include "bitap.h"
|
||
#include "debug.h"
|
||
+#include "levenshtein.h"
|
||
|
||
using namespace Qt::StringLiterals;
|
||
namespace
|
||
{
|
||
|
||
-int weightedLength(const QString &query)
|
||
+struct Score {
|
||
+ qreal value = 0.0; // The final score, it is the sum of all scores.
|
||
+ KRunner::QueryMatch::CategoryRelevance categoryRelevance = KRunner::QueryMatch::CategoryRelevance::Lowest; // The category relevance of the match.
|
||
+};
|
||
+
|
||
+struct ScoreCard {
|
||
+ Bitap::Match bitap;
|
||
+ qreal bitapScore;
|
||
+ int levenshtein;
|
||
+ qreal levenshteinScore;
|
||
+};
|
||
+
|
||
+QDebug operator<<(QDebug dbg, const ScoreCard &card)
|
||
{
|
||
- return KStringHandler::logicalLength(query);
|
||
+ dbg.nospace() << "Scorecard(" << "bitap: " << card.bitap << ", bitapScore: " << card.bitapScore << ", levenshtein: " << card.levenshtein
|
||
+ << ", levenshteinScore: " << card.levenshteinScore << ")";
|
||
+ return dbg;
|
||
}
|
||
|
||
-inline bool contains(const QString &result, const QList<QStringView> &queryList)
|
||
+using ScoreCards = std::vector<ScoreCard>;
|
||
+
|
||
+struct WeightedScoreCard {
|
||
+ ScoreCards cards;
|
||
+ qreal weight;
|
||
+};
|
||
+
|
||
+QDebug operator<<(QDebug dbg, const WeightedScoreCard &card)
|
||
{
|
||
- return std::ranges::all_of(queryList, [&result](QStringView query) {
|
||
- return result.contains(query, Qt::CaseInsensitive);
|
||
- });
|
||
+
|
||
+ dbg.nospace() << "WeightedCard[";
|
||
+ for (const auto &scoreCard : card.cards) {
|
||
+ dbg.nospace() << scoreCard;
|
||
+ if (&scoreCard != &card.cards.back()) {
|
||
+ dbg.nospace() << ", ";
|
||
+ }
|
||
+ }
|
||
+ dbg.nospace() << "]";
|
||
+ return dbg;
|
||
+}
|
||
+
|
||
+auto makeScores(const auto ¬NormalizedString, const auto &queryList) {
|
||
+ if (notNormalizedString.isEmpty()) {
|
||
+ return ScoreCards{}; // No string, no score.
|
||
+ }
|
||
+
|
||
+ const auto string = notNormalizedString.toLower();
|
||
+
|
||
+ ScoreCards cards;
|
||
+ for (const auto &queryItem : queryList) {
|
||
+ constexpr auto maxDistance = 1;
|
||
+ const auto bitap = Bitap::bitap(string, queryItem, maxDistance);
|
||
+ if (!bitap) {
|
||
+ // One of the query items didn't match. This means the entire query is not a match
|
||
+ return ScoreCards{};
|
||
+ }
|
||
+
|
||
+ const auto bitapScore = Bitap::score(string, bitap.value(), maxDistance);
|
||
+
|
||
+ // Mind that we give different levels of bonus. This is important to imply ordering within competing matches of the same "type".
|
||
+ // If we perfectly match that gives a bonus for not requiring any changes.
|
||
+ const auto noSubstitionBonus = Bitap::score(string, bitap.value(), 0) == 1.0 ? 4.0 : 1.0;
|
||
+ // If we match the entire length of the string that gets a bonus (disregarding distance, that was considered above).
|
||
+ const auto completeMatchBonus = bitap->end >= (queryItem.size() - 1) ? 3.0 : 1.0;
|
||
+ // If the string starts with the query item that gets a bonus.
|
||
+ const auto startsWithBonus = (string.startsWith(queryItem, Qt::CaseInsensitive)) ? 2.0 : 1.0;
|
||
+
|
||
+ // Also consider the distance between the input and the query item.
|
||
+ // If one is "yolotrollingservice" and the other is "yolo" then we must consider them worse matches than say "yolotroll".
|
||
+ const auto levenshtein = Levenshtein::distance(string, queryItem);
|
||
+
|
||
+ cards.emplace_back(ScoreCard{
|
||
+ .bitap = *bitap,
|
||
+ .bitapScore = bitapScore + completeMatchBonus + noSubstitionBonus + startsWithBonus,
|
||
+ .levenshtein = levenshtein,
|
||
+ .levenshteinScore = Levenshtein::score(string, levenshtein),
|
||
+ });
|
||
+ }
|
||
+
|
||
+ return cards;
|
||
+};
|
||
+
|
||
+
|
||
+auto makeScoreFromList(const auto &queryList, const QStringList &strings) {
|
||
+ // This turns the loop inside out. For every query item we must find a match in our keywords or we discard
|
||
+ ScoreCards cards;
|
||
+ // e.g. text,editor,programming
|
||
+ for (const auto &queryItem : queryList) {
|
||
+ // e.g. text;txt;editor;programming;programmer;development;developer;code;
|
||
+ auto found = false;
|
||
+ ScoreCards queryCards;
|
||
+ for (const auto &string : strings) {
|
||
+ auto stringCards = makeScores(string, QList{queryItem});
|
||
+ if (stringCards.empty()) {
|
||
+ continue; // The combination didn't match.
|
||
+ }
|
||
+ for (auto &scoreCard : stringCards) {
|
||
+ if (scoreCard.levenshteinScore < 0.8) {
|
||
+ continue; // Not a good match, skip it. We are very strict with keywords
|
||
+ }
|
||
+ found = true;
|
||
+ queryCards.append_range(stringCards);
|
||
+ }
|
||
+ // We do not break because other string might also match, improving the score.
|
||
+ }
|
||
+ if (!found) {
|
||
+ // No item in strings matched the query item. This means the entire query is not a match.
|
||
+ return ScoreCards{};
|
||
+ }
|
||
+ cards.append_range(queryCards);
|
||
+ }
|
||
+ return cards;
|
||
+};
|
||
+
|
||
+int weightedLength(const QString &query)
|
||
+{
|
||
+ return KStringHandler::logicalLength(query);
|
||
}
|
||
|
||
inline bool contains(const QStringList &results, const QList<QStringView> &queryList)
|
||
@@ -79,7 +188,7 @@ public:
|
||
|
||
void match(KRunner::RunnerContext &context)
|
||
{
|
||
- query = context.query();
|
||
+ query = context.query().toLower();
|
||
// Splitting the query term to match using subsequences
|
||
queryList = QStringView(query).split(QLatin1Char(' '));
|
||
weightedTermLength = weightedLength(query);
|
||
@@ -120,36 +229,6 @@ private:
|
||
return ret;
|
||
}
|
||
|
||
- enum class Category {
|
||
- Name,
|
||
- GenericName,
|
||
- Comment,
|
||
- };
|
||
- qreal increaseMatchRelevance(const QString &serviceProperty, const QList<QStringView> &strList, Category category)
|
||
- {
|
||
- // Increment the relevance based on all the words (other than the first) of the query list
|
||
- qreal relevanceIncrement = 0;
|
||
-
|
||
- for (int i = 1; i < strList.size(); ++i) {
|
||
- const auto &str = strList.at(i);
|
||
- if (category == Category::Name) {
|
||
- if (serviceProperty.contains(str, Qt::CaseInsensitive)) {
|
||
- relevanceIncrement += 0.01;
|
||
- }
|
||
- } else if (category == Category::GenericName) {
|
||
- if (serviceProperty.contains(str, Qt::CaseInsensitive)) {
|
||
- relevanceIncrement += 0.01;
|
||
- }
|
||
- } else if (category == Category::Comment) {
|
||
- if (serviceProperty.contains(str, Qt::CaseInsensitive)) {
|
||
- relevanceIncrement += 0.01;
|
||
- }
|
||
- }
|
||
- }
|
||
-
|
||
- return relevanceIncrement;
|
||
- }
|
||
-
|
||
void setupMatch(const KService::Ptr &service, KRunner::QueryMatch &match)
|
||
{
|
||
const QString name = service->name();
|
||
@@ -219,96 +298,77 @@ private:
|
||
return resultingArgs.join(QLatin1Char(' '));
|
||
}
|
||
|
||
- void matchNameKeywordAndGenericName()
|
||
+ [[nodiscard]] std::optional<Score> fuzzyScore(KService::Ptr service)
|
||
{
|
||
- const auto nameKeywordAndGenericNameFilter = [this](const KService::Ptr &service) {
|
||
- // Name
|
||
- if (contains(service->name(), queryList)) {
|
||
- return true;
|
||
- }
|
||
- // If the term length is < 3, no real point searching the untranslated Name, Keywords and GenericName
|
||
- if (weightedTermLength < 3) {
|
||
- return false;
|
||
- }
|
||
- if (contains(service->untranslatedName(), queryList)) {
|
||
- return true;
|
||
- }
|
||
+ if (queryList.isEmpty()) {
|
||
+ return std::nullopt; // No query, no score.
|
||
+ }
|
||
+
|
||
+ const auto name = service->name();
|
||
+ if (name.compare(query, Qt::CaseInsensitive) == 0) {
|
||
+ // Absolute match. Can't get any better than this.
|
||
+ return Score{.value = std::numeric_limits<decltype(Score::value)>::max(), .categoryRelevance = KRunner::QueryMatch::CategoryRelevance::Highest};
|
||
+ }
|
||
|
||
- // Keywords
|
||
- if (contains(service->keywords(), queryList)) {
|
||
- return true;
|
||
+ std::array<WeightedScoreCard, 4> weightedCards = {
|
||
+ WeightedScoreCard{.cards = makeScores(name, queryList), .weight = 1.0},
|
||
+ WeightedScoreCard{.cards = makeScores(service->untranslatedName(), queryList), .weight = 0.8},
|
||
+ WeightedScoreCard{.cards = makeScores(service->genericName(), queryList), .weight = 0.6},
|
||
+ WeightedScoreCard{.cards = makeScoreFromList(queryList, service->keywords()), .weight = 0.1},
|
||
+ };
|
||
+
|
||
+ if (RUNNER_SERVICES().isDebugEnabled()) {
|
||
+ qCDebug(RUNNER_SERVICES) << "+++++++ Weighted Cards for" << name;
|
||
+ for (const auto &weightedCard : weightedCards) {
|
||
+ qCDebug(RUNNER_SERVICES) << weightedCard;
|
||
}
|
||
- // GenericName
|
||
- if (contains(service->genericName(), queryList) || contains(service->untranslatedGenericName(), queryList)) {
|
||
- return true;
|
||
+ qCDebug(RUNNER_SERVICES) << "-------";
|
||
+ }
|
||
+
|
||
+ int scores = 1; // starts at 1 to avoid division by zero
|
||
+ qreal finalScore = 0.0;
|
||
+ for (const auto &weightedCard : weightedCards) {
|
||
+ if (weightedCard.cards.empty()) {
|
||
+ continue; // No scores, no match.
|
||
}
|
||
- // Comment
|
||
- if (contains(service->comment(), queryList)) {
|
||
- return true;
|
||
+
|
||
+ qreal weightedScore = 0.0;
|
||
+ for (const auto &scoreCard : weightedCard.cards) {
|
||
+ weightedScore += (scoreCard.bitapScore + scoreCard.levenshteinScore) * weightedCard.weight;
|
||
+ scores++;
|
||
}
|
||
|
||
- return false;
|
||
- };
|
||
+ finalScore += weightedScore;
|
||
+ }
|
||
+ finalScore = finalScore / scores; // Average the score for this card
|
||
|
||
- for (const KService::Ptr &service : m_services) {
|
||
- if (!nameKeywordAndGenericNameFilter(service) || disqualify(service)) {
|
||
- continue;
|
||
- }
|
||
+ qCDebug(RUNNER_SERVICES) << "Final score for" << name << "is" << finalScore;
|
||
+ if (finalScore > 0.0) {
|
||
+ return Score{.value = finalScore, .categoryRelevance = KRunner::QueryMatch::CategoryRelevance::Moderate};
|
||
+ }
|
||
|
||
- const QString id = service->storageId();
|
||
- const QString name = service->name();
|
||
+ return std::nullopt;
|
||
+ }
|
||
|
||
- KRunner::QueryMatch::CategoryRelevance categoryRelevance = KRunner::QueryMatch::CategoryRelevance::Moderate;
|
||
- qreal relevance(0.6);
|
||
+ void matchNameKeywordAndGenericName()
|
||
+ {
|
||
+ static auto isTest = QStandardPaths::isTestModeEnabled();
|
||
|
||
- // If the term was < 3 chars and NOT at the beginning of the App's name, then chances are the user doesn't want that app
|
||
- if (weightedTermLength < 3) {
|
||
- if (name.startsWith(query, Qt::CaseInsensitive)) {
|
||
- relevance = 0.9;
|
||
- } else {
|
||
- continue;
|
||
- }
|
||
- } else if (name.compare(query, Qt::CaseInsensitive) == 0) {
|
||
- relevance = 1;
|
||
- categoryRelevance = KRunner::QueryMatch::CategoryRelevance::Highest;
|
||
- } else if (const auto idx = name.indexOf(queryList[0], 0, Qt::CaseInsensitive); idx != -1) {
|
||
- relevance = 0.8;
|
||
- relevance += increaseMatchRelevance(name, queryList, Category::Name);
|
||
- if (idx == 0) {
|
||
- relevance += 0.1;
|
||
- categoryRelevance = KRunner::QueryMatch::CategoryRelevance::High;
|
||
- }
|
||
- } else if (const auto idx = service->genericName().indexOf(queryList[0], 0, Qt::CaseInsensitive); idx != -1) {
|
||
- relevance = 0.65;
|
||
- relevance += increaseMatchRelevance(service->genericName(), queryList, Category::GenericName);
|
||
- if (idx == 0) {
|
||
- relevance += 0.05;
|
||
- }
|
||
- } else if (const auto idx = service->comment().indexOf(queryList[0], 0, Qt::CaseInsensitive); idx != -1) {
|
||
- relevance = 0.5;
|
||
- relevance += increaseMatchRelevance(service->comment(), queryList, Category::Comment);
|
||
- if (idx == 0) {
|
||
- relevance += 0.05;
|
||
- }
|
||
+ for (const KService::Ptr &service : m_services) {
|
||
+ if (isTest && !service->name().contains("ServiceRunnerTest"_L1)) {
|
||
+ continue; // Skip services that are not part of the test.
|
||
}
|
||
|
||
KRunner::QueryMatch match(m_runner);
|
||
- match.setCategoryRelevance(categoryRelevance);
|
||
- setupMatch(service, match);
|
||
- if (service->categories().contains(QLatin1String("KDE"))) {
|
||
- qCDebug(RUNNER_SERVICES) << "found a kde thing" << id << match.subtext() << relevance;
|
||
- relevance += .09;
|
||
- }
|
||
-
|
||
- if (const auto foundIt = m_runner->m_favorites.constFind(service->desktopEntryName()); foundIt != m_runner->m_favorites.cend()) {
|
||
- if (foundIt->isGlobal || foundIt->linkedActivities.contains(m_currentActivity)) {
|
||
- qCDebug(RUNNER_SERVICES) << "entry is a favorite" << id << match.subtext() << relevance;
|
||
- relevance *= 1.25; // Give favorites a relative boost,
|
||
- }
|
||
+ auto score = fuzzyScore(service);
|
||
+ if (!score || disqualify(service)) {
|
||
+ continue;
|
||
}
|
||
|
||
- qCDebug(RUNNER_SERVICES) << name << "is this relevant:" << relevance;
|
||
- match.setRelevance(relevance);
|
||
+ setupMatch(service, match);
|
||
+ match.setCategoryRelevance(score->categoryRelevance);
|
||
+ match.setRelevance(score->value);
|
||
+ qCDebug(RUNNER_SERVICES) << match.text() << "is this relevant:" << match.relevance() << "category relevance" << match.categoryRelevance();
|
||
|
||
matches << match;
|
||
}
|
||
--
|
||
2.51.0
|
||
|