From 312c215e717654e55fa48ec968f412201d2a5544 Mon Sep 17 00:00:00 2001 From: Harald Sitter Date: Mon, 14 Jul 2025 17:28:14 +0200 Subject: [PATCH] servicerunner: fuzzy match use a bitap implementation instead of doing awkward contains dances. this should lead to somewhat more reliable results, which are now more comprehensively asserted in the unit test at the heart of this is a new fuzzyScore function that assigns a score to a service vis a vis a query. this score is adjusted depending on which field it is regarding (name > genericname > keywords). this should hopefully ensure that a match against name outweighs most other matches. all scores are eventually assembled into a final score that gets used as match relevance --- runners/services/autotests/CMakeLists.txt | 3 + runners/services/autotests/bitaptest.cpp | 70 +++++ .../autotests/fixtures/audacity.desktop | 2 +- .../fixtures/org.kde.discover.desktop | 17 ++ .../autotests/fixtures/org.kde.kpat.desktop | 2 +- .../services/autotests/servicerunnertest.cpp | 94 ++++-- runners/services/bitap.h | 178 +++++++++++ runners/services/levenshtein.h | 58 ++++ runners/services/servicerunner.cpp | 286 +++++++++++------- 9 files changed, 576 insertions(+), 134 deletions(-) create mode 100644 runners/services/autotests/bitaptest.cpp create mode 100755 runners/services/autotests/fixtures/org.kde.discover.desktop create mode 100644 runners/services/bitap.h create mode 100644 runners/services/levenshtein.h diff --git a/runners/services/autotests/CMakeLists.txt b/runners/services/autotests/CMakeLists.txt index 04849a2928..ff7ec66634 100644 --- a/runners/services/autotests/CMakeLists.txt +++ b/runners/services/autotests/CMakeLists.txt @@ -6,3 +6,6 @@ remove_definitions(-DQT_NO_CAST_FROM_ASCII) ecm_add_test(servicerunnertest.cpp TEST_NAME servicerunnertest LINK_LIBRARIES Qt::Test KF6::Service KF6::Runner) krunner_configure_test(servicerunnertest krunner_services) + +ecm_add_test(bitaptest.cpp TEST_NAME bitaptest + LINK_LIBRARIES Qt::Test) diff --git a/runners/services/autotests/bitaptest.cpp b/runners/services/autotests/bitaptest.cpp new file mode 100644 index 0000000000..1a1cb856ec --- /dev/null +++ b/runners/services/autotests/bitaptest.cpp @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL +// SPDX-FileCopyrightText: 2025 Harald Sitter + +#include +#include +#include +#include +#include +#include +#include + +#include "../bitap.h" + +class BitapTest : public QObject +{ + Q_OBJECT +private Q_SLOTS: + void initTestCase() + { + } + void cleanupTestCase() + { + } + + void testBitap() + { + using namespace Bitap; + // The macro has trouble with designated initializers, so we wrap them in (). + QCOMPARE(bitap(u"hello world", u"hello", 1), (Match{.end = 4, .distance = 0})); + QCOMPARE(bitap(u"wireshark", u"di", 1), (Match{.end = 1, .distance = 1})); + QCOMPARE(bitap(u"discover", u"disk", 1), (Match{.end = 2, .distance = 1})); + QCOMPARE(bitap(u"discover", u"disc", 1), (Match{.end = 3, .distance = 0})); + QCOMPARE(bitap(u"discover", u"scov", 1), (Match{.end = 5, .distance = 0})); + QCOMPARE(bitap(u"discover", u"diki", 1), std::nullopt); + QCOMPARE(bitap(u"discover", u"obo", 1), std::nullopt); + // With a hamming distance of 1 this may match because it is a single transposition. + QCOMPARE(bitap(u"discover", u"dicsover", 1), (Match{.end = 7, .distance = 1})); + // … but with three characters out of place things should not match. + QCOMPARE(bitap(u"discover", u"dicosver", 1), std::nullopt); + // pattern too long + QCOMPARE(bitap(u"discover", u" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~", 1), std::nullopt); + // This is not a transposition as per Damerau–Levenshtein distance because the characters are not adjacent. + QCOMPARE(bitap(u"steam", u"skeap", 1), std::nullopt); + // Deletion required + QCOMPARE(bitap(u"discover", u"discover", 1), (Match{.end = 7, .distance = 0})); + QCOMPARE(bitap(u"discover", u"discovery", 1), (Match{.end = 7, .distance = 1})); + // Insertion required + QCOMPARE(bitap(u"discover", u"dicover", 1), (Match{.end = 7, .distance = 1})); + } + + void testScore() + { + using namespace Bitap; + // aperfectten has 10 big beautiful indexes. The maximum end is therefore 10. + QCOMPARE(score(u"aperfectten", Match{.end = 10, .distance = 0}, 1), 1.0); + QCOMPARE(score(u"aperfectten", Match{.end = 4, .distance = 0}, 1), 0.4); + QCOMPARE(score(u"aperfectten", Match{.end = 4, .distance = 1}, 1), 0.35); + QCOMPARE(score(u"aperfectten", Match{.end = 0, .distance = 0}, 0), 0); + QCOMPARE(score(u"aperfectten", Match{.end = 0, .distance = 0}, 1), 0); + QCOMPARE(score(u"aperfectten", Match{.end = 1, .distance = 1}, 1), 0.05); + + QCOMPARE(score(u"abc", Match{.end = 2, .distance = 1}, 1), 0.95); + // Ask for distance 0 but it has a distance so this is a super bad match. + QCOMPARE(score(u"abc", Match{.end = 2, .distance = 1}, 0), 0); + } +}; + +QTEST_MAIN(BitapTest) + +#include "bitaptest.moc" diff --git a/runners/services/autotests/fixtures/audacity.desktop b/runners/services/autotests/fixtures/audacity.desktop index 7613d9f32f..05e1b9d929 100644 --- a/runners/services/autotests/fixtures/audacity.desktop +++ b/runners/services/autotests/fixtures/audacity.desktop @@ -1,5 +1,5 @@ [Desktop Entry] -Name=Audacity +Name=Audacity ServiceRunnerTest GenericName=Sound Editor Comment=Record and edit audio files Keywords=audio;sound;alsa;jack;editor; diff --git a/runners/services/autotests/fixtures/org.kde.discover.desktop b/runners/services/autotests/fixtures/org.kde.discover.desktop new file mode 100755 index 0000000000..978b2b4152 --- /dev/null +++ b/runners/services/autotests/fixtures/org.kde.discover.desktop @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: None +# SPDX-License-Identifier: CC0-1.0 +[Desktop Entry] +Name=Discover ServiceRunnerTest +Comment=Install and remove apps and add-ons +MimeType=application/vnd.flatpak;application/vnd.flatpak.repo;application/vnd.flatpak.ref; +Exec=plasma-discover %F +Icon=plasmadiscover +Type=Application +X-DocPath=plasma-discover/index.html +InitialPreference=5 +NoDisplay=false +Actions=Updates; +SingleMainWindow=true +GenericName=Software Center +Categories=Qt;KDE;System; +Keywords=program;software;store;repository;package;add;install;uninstall;remove;update;apps;applications;games;flatpak;snap;addons;add-ons;firmware; diff --git a/runners/services/autotests/fixtures/org.kde.kpat.desktop b/runners/services/autotests/fixtures/org.kde.kpat.desktop index 71d7fd2a89..3a91d89afe 100644 --- a/runners/services/autotests/fixtures/org.kde.kpat.desktop +++ b/runners/services/autotests/fixtures/org.kde.kpat.desktop @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2022 Alexander Lohnau # SPDX-License-Identifier: CC0-1.0 [Desktop Entry] -Name=KPatience +Name=KPatience ServiceRunnerTest Exec=true -qwindowtitle %c %u Type=Application Icon=kpat diff --git a/runners/services/autotests/servicerunnertest.cpp b/runners/services/autotests/servicerunnertest.cpp index fcfd3275ac..b911667a3b 100644 --- a/runners/services/autotests/servicerunnertest.cpp +++ b/runners/services/autotests/servicerunnertest.cpp @@ -36,6 +36,10 @@ private Q_SLOTS: void testINotifyUsage(); void testSpecialArgs(); void testEnv(); + void testDisassociation(); + void testMultipleKeywords(); + void testMultipleNameWords(); + void testDiscover(); }; void ServiceRunnerTest::initTestCase() @@ -86,8 +90,8 @@ void ServiceRunnerTest::testExecutableExactMatch() void ServiceRunnerTest::testKonsoleVsYakuakeComment() { - // Yakuake has konsole mentioned in comment, should be rated lower. - const auto matches = launchQuery(QStringLiteral("kons")); + // Yakuake has konsole mentioned in comment, should not be listed (if it was it should be lower) + auto matches = launchQueryAndSort(QStringLiteral("kons")); bool konsoleFound = false; bool yakuakeFound = false; @@ -97,17 +101,10 @@ void ServiceRunnerTest::testKonsoleVsYakuakeComment() continue; } - if (match.text() == QLatin1String("Konsole ServiceRunnerTest")) { - QCOMPARE(match.relevance(), 0.99); - konsoleFound = true; - } else if (match.text() == QLatin1String("Yakuake ServiceRunnerTest")) { - // Rates lower because it doesn't have it in the name. - QCOMPARE(match.relevance(), 0.59); - yakuakeFound = true; - } - } - QVERIFY(konsoleFound); - QVERIFY(yakuakeFound); + QCOMPARE(texts, + QStringList({ + u"Konsole ServiceRunnerTest"_s, + })); } void ServiceRunnerTest::testSystemSettings() @@ -150,8 +147,9 @@ void ServiceRunnerTest::testSystemSettings2() foreignSystemSettingsFound = true; } } - QVERIFY(systemSettingsFound); - QVERIFY(!foreignSystemSettingsFound); + + // The matched texts will contain much more because of the generic search term. Make sure our settings win. + QCOMPARE(texts.at(0), u"System Settings ServiceRunnerTest"_s); } void ServiceRunnerTest::testCategories() @@ -172,10 +170,6 @@ void ServiceRunnerTest::testCategories() QVERIFY(std::none_of(matches.cbegin(), matches.cend(), [](const KRunner::QueryMatch &match) { return match.text() == QLatin1String("Konsole ServiceRunnerTest"); })); - - // Query too short to match any category - matches = launchQuery(QStringLiteral("Dumm")); - QVERIFY(matches.isEmpty()); } void ServiceRunnerTest::testJumpListActions() @@ -234,6 +228,68 @@ void ServiceRunnerTest::testEnv() })); } +void ServiceRunnerTest::testDisassociation() +{ + // This test makes sure that we do not associate a service with a query that is not relevant. + auto matches = launchQueryAndSort(u"new laptop com"_s); // particularly notorious because it has two three letter words; 'com' is an incomplete word + + QStringList texts; + for (const auto &match : matches) { + texts.push_back(match.text()); + } + + QCOMPARE(texts, QStringList()); +} + +void ServiceRunnerTest::testMultipleKeywords() +{ + auto matches = launchQueryAndSort(u"text editor programming"_s); + + QStringList texts; + for (const auto &match : matches) { + texts.push_back(match.text()); + } + + QCOMPARE(texts, + QStringList({ + u"Kate ServiceRunnerTest"_s, + })); +} + +void ServiceRunnerTest::testMultipleNameWords() +{ + auto matches = launchQueryAndSort(u"system settings"_s); + + QStringList texts; + for (const auto &match : matches) { + if (!match.text().contains("ServiceRunnerTest"_L1)) { + continue; + } + texts.push_back(match.text()); + } + + QCOMPARE(texts, + QStringList({ + u"System Settings ServiceRunnerTest"_s, + })); +} + +void ServiceRunnerTest::testDiscover() +{ + auto matches = launchQueryAndSort(u"disco"_s); + + QStringList texts; + for (const auto &match : matches) { + texts.push_back(match.text()); + } + + qDebug() << texts; + QCOMPARE(texts, + QStringList({ + u"Discover ServiceRunnerTest"_s, + })); +} + QTEST_MAIN(ServiceRunnerTest) #include "servicerunnertest.moc" diff --git a/runners/services/bitap.h b/runners/services/bitap.h new file mode 100644 index 0000000000..a6aedb7eaf --- /dev/null +++ b/runners/services/bitap.h @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL +// SPDX-FileCopyrightText: 2025 Harald Sitter + +#pragma once + +#include +#include + +#include +#include +#include + +namespace Bitap +{ + +Q_DECLARE_LOGGING_CATEGORY(BITAP) +Q_LOGGING_CATEGORY(BITAP, "org.kde.plasma.runner.services.bitap", QtWarningMsg) + +struct Match { + qsizetype end; + qsizetype distance; + + bool operator==(const Match &other) const = default; +}; + +inline QDebug operator<<(QDebug dbg, const Bitap::Match &match) +{ + dbg.nospace() << "Bitap::Match(" << match.end << ", " << match.distance << ")"; + return dbg; +} + +// Bitap is a bit of a complicated algorithm thanks to bitwise operations. I've opted to replace them with bitsets for readability. +// It creates a patternMask based on all characters in the pattern. Basically each character gets assigned a representative bit. +// e.g. in the pattern 'abc' the character 'a' would be 110, 'b' 101, 'c' 011. +// This is a bit expensive up front but allows it to carry out everything else using bitwise operations. +// For each match we set a matching bit in the bits vector. +// Matching happens within a hamming distance, meaning up to `hammingDistance` characters can be out of place. +inline std::optional bitap(const QStringView &name, const QStringView &pattern, int hammingDistance) +{ + qCDebug(BITAP) << "Bitap called with name:" << name << "and pattern:" << pattern << "with hamming distance:" << hammingDistance; + const auto patternEndIndex = pattern.size() - 1; + if (name == pattern) { + return Match{.end = patternEndIndex, .distance = 0}; // Perfect match + } + + if (pattern.isEmpty() || name.isEmpty()) { + return std::nullopt; + } + + // Being a bitset we could have any number of bits, but practically we probably don't need more than 64, most bitaps I've seen even use 32. + constexpr auto maxMaskBits = 64; + using Mask = std::bitset; + using PatternMask = std::array::max()>; + + // The way bitap works is that each bit of the Mask represents a character position. Because of this we cannot match + // more characters than we have bits for. + // -1 because one bit is used for the result (I think) + if (pattern.size() >= qsizetype(Mask().size()) - 1) { + qCWarning(BITAP) << "Pattern is too long for bitap algorithm, max length is" << Mask().size() - 1; + return std::nullopt; + } + + const PatternMask patternMask = [&pattern, &name] { + PatternMask patternMask; + // The following is an optimized version of patternMask.fill(Mask().set()); to set all **necessary** bits to 1. + for (const auto &qchar : pattern) { + patternMask.at(qchar.unicode()).set(); + } + for (const auto &qchar : name) { + patternMask.at(qchar.unicode()).set(); + } + + for (int i = 0; i < pattern.size(); ++i) { + const auto char_ = pattern.at(i).unicode(); + patternMask.at(char_).reset(i); // unset the relevant index bits + } + + if (BITAP().isDebugEnabled()) { + for (const auto &i : pattern) { + const auto char_ = i.unicode(); + qCDebug(BITAP) << "Pattern mask for" << char_ << "is" << patternMask.at(char_).to_string(); + } + } + + return patternMask; + }(); + + Match match{ + .end = -1, // -1 means no match found for convenience + .distance = name.size(), + }; + + std::vector bits((hammingDistance + 1), Mask().set().reset(0)); + std::vector transpositions(bits.cbegin(), bits.cend()); + for (int i = 0; i < name.size(); ++i) { + const auto &char_ = name.at(i); + auto previousBit = bits[0]; + const auto mask = patternMask.at(char_.unicode()); + bits[0] |= mask; + bits[0] <<= 1; + + for (int j = 1; j <= hammingDistance; ++j) { + auto bit = bits[j]; + auto current = (bit | mask) << 1; + // https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance + auto substitute = previousBit << 1; + auto delete_ = bits[j - 1] << 1; + auto insert = previousBit; + auto transpose = (transpositions[j - 1] | (mask << 1)) << 1; + bits[j] = current & substitute & transpose & delete_ & insert; + transpositions[j - 1] = (previousBit << 1) | mask; + previousBit = bit; + } + + if (BITAP().isDebugEnabled()) { + qCDebug(BITAP) << "After processing character" << char_ << "at index" << i; + for (const auto &bit : bits) { + qCDebug(BITAP) << "bit" << bit.to_string(); + } + } + + for (int k = 0; k <= hammingDistance; ++k) { + // If the bit at the end of the mask is 0, it means we have a match. + if (0 == (bits[k] & Mask().set(pattern.size()))) { + if (k < match.distance && match.end < i) { + qCDebug(BITAP) << "Match found at index" << i << "with hamming distance" << k << "better than previous match with distance" + << match.distance << "at index" << match.end; + match = { + .end = i, + .distance = k, + }; + } + // We do not return early because we want to find the best match, not just any. + // e.g. with a maximum distance of 1 `disc` could match `disc` either at index two with distance one, or at index three with distance zero. + } + } + } + + // Because we use a complete Damerau–Levenshtein distance the return value is a bit complicated. The trick is that the distance incurs a negative penalty + // in relation to the max distance. While an end that is closer to the real end is generally favorably. Combining the two into a single value + // would complicate the meaning of the return value to mean "approximate end with random penalty". This is garbage to reason about so instead we return + // both values and then assign them meaning in the score function. + if (match.end != -1) { + return match; + } + + qCDebug(BITAP) << "No match found for pattern" << pattern << "in name" << name; + return std::nullopt; +} + +inline qreal score(const QStringView &name, const auto &match, auto hammingDistance) +{ + // Normalize the score to a value between 0.0 and 1.0 + // No distance means the score is directly correlated to the end index. The more characters matched the higher the score. + // Any distance will lower the score by a sub 0.1 margin. + + if (name.size() == 0) { + return 0.0; // No name, no score. + } + + const auto maxEnd = name.size() - 1; + const auto penalty = [&] { + if (hammingDistance <= 0) { + return 1.0; // No penalty for no distance + } + constexpr auto tenth = 10.0; + constexpr auto half = 2.0; + return qreal(match.distance) / qreal(hammingDistance) / tenth / half; + }(); + auto score = qreal(match.end) / qreal(maxEnd); + // Prevent underflows when the penalty is larger than the score. + score = std::max(0.0, score - penalty); + + Q_ASSERT(score >= 0.0 && score <= 1.0); + return score; +} + +} // namespace Bitap diff --git a/runners/services/levenshtein.h b/runners/services/levenshtein.h new file mode 100644 index 0000000000..0efb960be3 --- /dev/null +++ b/runners/services/levenshtein.h @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL +// SPDX-FileCopyrightText: 2025 Harald Sitter + +#pragma + +#include +#include + +namespace Levenshtein +{ + +inline int distance(const QStringView &name, const QStringView &query) +{ + if (name == query) { + return 0; + } + + std::vector distance0(query.size() + 1, 0); + std::vector distance1(query.size() + 1, 0); + + for (int i = 0; i <= query.size(); ++i) { + distance0[i] = i; + } + + for (int i = 0; i < name.size(); ++i) { + distance1[0] = i + 1; + for (int j = 0; j < query.size(); ++j) { + const auto deletionCost = distance0[j + 1] + 1; + const auto insertionCost = distance1[j] + 1; + const auto substitutionCost = [&] { + if (name[i] == query[j]) { + return distance0[j]; + } + return distance0[j] + 1; + }(); + distance1[j + 1] = std::min({deletionCost, insertionCost, substitutionCost}); + } + std::swap(distance0, distance1); + } + return distance0[query.size()]; +} + +inline qreal score(const QStringView &name, int distance) +{ + // Normalize the distance to a value between 0.0 and 1.0 + // The maximum distance is the length of the pattern. + // If the distance is 0, it means a perfect match, so we return 1.0. + // If the distance is equal to the length of the pattern, we return 0.0. + if (distance == 0) { + return 1.0; + } + if (distance >= name.size()) { + return 0.0; + } + return 1.0 - (qreal(distance) / qreal(name.size())); +} + +} // namespace Levenshtein diff --git a/runners/services/servicerunner.cpp b/runners/services/servicerunner.cpp index eb9f02e74b..3d5de8feb2 100644 --- a/runners/services/servicerunner.cpp +++ b/runners/services/servicerunner.cpp @@ -1,7 +1,7 @@ /* SPDX-FileCopyrightText: 2006 Aaron Seigo SPDX-FileCopyrightText: 2014 Vishesh Handa - SPDX-FileCopyrightText: 2016-2020 Harald Sitter + SPDX-FileCopyrightText: 2016-2025 Harald Sitter SPDX-FileCopyrightText: 2022-2023 Alexander Lohnau SPDX-License-Identifier: LGPL-2.0-only @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -35,22 +36,130 @@ #include #include +#include "bitap.h" #include "debug.h" +#include "levenshtein.h" using namespace Qt::StringLiterals; namespace { -int weightedLength(const QString &query) +struct Score { + qreal value = 0.0; // The final score, it is the sum of all scores. + KRunner::QueryMatch::CategoryRelevance categoryRelevance = KRunner::QueryMatch::CategoryRelevance::Lowest; // The category relevance of the match. +}; + +struct ScoreCard { + Bitap::Match bitap; + qreal bitapScore; + int levenshtein; + qreal levenshteinScore; +}; + +QDebug operator<<(QDebug dbg, const ScoreCard &card) { - return KStringHandler::logicalLength(query); + dbg.nospace() << "Scorecard(" << "bitap: " << card.bitap << ", bitapScore: " << card.bitapScore << ", levenshtein: " << card.levenshtein + << ", levenshteinScore: " << card.levenshteinScore << ")"; + return dbg; } -inline bool contains(const QString &result, const QList &queryList) +using ScoreCards = std::vector; + +struct WeightedScoreCard { + ScoreCards cards; + qreal weight; +}; + +QDebug operator<<(QDebug dbg, const WeightedScoreCard &card) { - return std::ranges::all_of(queryList, [&result](QStringView query) { - return result.contains(query, Qt::CaseInsensitive); - }); + + dbg.nospace() << "WeightedCard["; + for (const auto &scoreCard : card.cards) { + dbg.nospace() << scoreCard; + if (&scoreCard != &card.cards.back()) { + dbg.nospace() << ", "; + } + } + dbg.nospace() << "]"; + return dbg; +} + +auto makeScores(const auto ¬NormalizedString, const auto &queryList) { + if (notNormalizedString.isEmpty()) { + return ScoreCards{}; // No string, no score. + } + + const auto string = notNormalizedString.toLower(); + + ScoreCards cards; + for (const auto &queryItem : queryList) { + constexpr auto maxDistance = 1; + const auto bitap = Bitap::bitap(string, queryItem, maxDistance); + if (!bitap) { + // One of the query items didn't match. This means the entire query is not a match + return ScoreCards{}; + } + + const auto bitapScore = Bitap::score(string, bitap.value(), maxDistance); + + // Mind that we give different levels of bonus. This is important to imply ordering within competing matches of the same "type". + // If we perfectly match that gives a bonus for not requiring any changes. + const auto noSubstitionBonus = Bitap::score(string, bitap.value(), 0) == 1.0 ? 4.0 : 1.0; + // If we match the entire length of the string that gets a bonus (disregarding distance, that was considered above). + const auto completeMatchBonus = bitap->end >= (queryItem.size() - 1) ? 3.0 : 1.0; + // If the string starts with the query item that gets a bonus. + const auto startsWithBonus = (string.startsWith(queryItem, Qt::CaseInsensitive)) ? 2.0 : 1.0; + + // Also consider the distance between the input and the query item. + // If one is "yolotrollingservice" and the other is "yolo" then we must consider them worse matches than say "yolotroll". + const auto levenshtein = Levenshtein::distance(string, queryItem); + + cards.emplace_back(ScoreCard{ + .bitap = *bitap, + .bitapScore = bitapScore + completeMatchBonus + noSubstitionBonus + startsWithBonus, + .levenshtein = levenshtein, + .levenshteinScore = Levenshtein::score(string, levenshtein), + }); + } + + return cards; +}; + + +auto makeScoreFromList(const auto &queryList, const QStringList &strings) { + // This turns the loop inside out. For every query item we must find a match in our keywords or we discard + ScoreCards cards; + // e.g. text,editor,programming + for (const auto &queryItem : queryList) { + // e.g. text;txt;editor;programming;programmer;development;developer;code; + auto found = false; + ScoreCards queryCards; + for (const auto &string : strings) { + auto stringCards = makeScores(string, QList{queryItem}); + if (stringCards.empty()) { + continue; // The combination didn't match. + } + for (auto &scoreCard : stringCards) { + if (scoreCard.levenshteinScore < 0.8) { + continue; // Not a good match, skip it. We are very strict with keywords + } + found = true; + queryCards.append_range(stringCards); + } + // We do not break because other string might also match, improving the score. + } + if (!found) { + // No item in strings matched the query item. This means the entire query is not a match. + return ScoreCards{}; + } + cards.append_range(queryCards); + } + return cards; +}; + +int weightedLength(const QString &query) +{ + return KStringHandler::logicalLength(query); } inline bool contains(const QStringList &results, const QList &queryList) @@ -79,7 +188,7 @@ public: void match(KRunner::RunnerContext &context) { - query = context.query(); + query = context.query().toLower(); // Splitting the query term to match using subsequences queryList = QStringView(query).split(QLatin1Char(' ')); weightedTermLength = weightedLength(query); @@ -120,36 +229,6 @@ private: return ret; } - enum class Category { - Name, - GenericName, - Comment, - }; - qreal increaseMatchRelevance(const QString &serviceProperty, const QList &strList, Category category) - { - // Increment the relevance based on all the words (other than the first) of the query list - qreal relevanceIncrement = 0; - - for (int i = 1; i < strList.size(); ++i) { - const auto &str = strList.at(i); - if (category == Category::Name) { - if (serviceProperty.contains(str, Qt::CaseInsensitive)) { - relevanceIncrement += 0.01; - } - } else if (category == Category::GenericName) { - if (serviceProperty.contains(str, Qt::CaseInsensitive)) { - relevanceIncrement += 0.01; - } - } else if (category == Category::Comment) { - if (serviceProperty.contains(str, Qt::CaseInsensitive)) { - relevanceIncrement += 0.01; - } - } - } - - return relevanceIncrement; - } - void setupMatch(const KService::Ptr &service, KRunner::QueryMatch &match) { const QString name = service->name(); @@ -219,96 +298,77 @@ private: return resultingArgs.join(QLatin1Char(' ')); } - void matchNameKeywordAndGenericName() + [[nodiscard]] std::optional fuzzyScore(KService::Ptr service) { - const auto nameKeywordAndGenericNameFilter = [this](const KService::Ptr &service) { - // Name - if (contains(service->name(), queryList)) { - return true; - } - // If the term length is < 3, no real point searching the untranslated Name, Keywords and GenericName - if (weightedTermLength < 3) { - return false; - } - if (contains(service->untranslatedName(), queryList)) { - return true; - } + if (queryList.isEmpty()) { + return std::nullopt; // No query, no score. + } + + const auto name = service->name(); + if (name.compare(query, Qt::CaseInsensitive) == 0) { + // Absolute match. Can't get any better than this. + return Score{.value = std::numeric_limits::max(), .categoryRelevance = KRunner::QueryMatch::CategoryRelevance::Highest}; + } - // Keywords - if (contains(service->keywords(), queryList)) { - return true; + std::array weightedCards = { + WeightedScoreCard{.cards = makeScores(name, queryList), .weight = 1.0}, + WeightedScoreCard{.cards = makeScores(service->untranslatedName(), queryList), .weight = 0.8}, + WeightedScoreCard{.cards = makeScores(service->genericName(), queryList), .weight = 0.6}, + WeightedScoreCard{.cards = makeScoreFromList(queryList, service->keywords()), .weight = 0.1}, + }; + + if (RUNNER_SERVICES().isDebugEnabled()) { + qCDebug(RUNNER_SERVICES) << "+++++++ Weighted Cards for" << name; + for (const auto &weightedCard : weightedCards) { + qCDebug(RUNNER_SERVICES) << weightedCard; } - // GenericName - if (contains(service->genericName(), queryList) || contains(service->untranslatedGenericName(), queryList)) { - return true; + qCDebug(RUNNER_SERVICES) << "-------"; + } + + int scores = 1; // starts at 1 to avoid division by zero + qreal finalScore = 0.0; + for (const auto &weightedCard : weightedCards) { + if (weightedCard.cards.empty()) { + continue; // No scores, no match. } - // Comment - if (contains(service->comment(), queryList)) { - return true; + + qreal weightedScore = 0.0; + for (const auto &scoreCard : weightedCard.cards) { + weightedScore += (scoreCard.bitapScore + scoreCard.levenshteinScore) * weightedCard.weight; + scores++; } - return false; - }; + finalScore += weightedScore; + } + finalScore = finalScore / scores; // Average the score for this card - for (const KService::Ptr &service : m_services) { - if (!nameKeywordAndGenericNameFilter(service) || disqualify(service)) { - continue; - } + qCDebug(RUNNER_SERVICES) << "Final score for" << name << "is" << finalScore; + if (finalScore > 0.0) { + return Score{.value = finalScore, .categoryRelevance = KRunner::QueryMatch::CategoryRelevance::Moderate}; + } - const QString id = service->storageId(); - const QString name = service->name(); + return std::nullopt; + } - KRunner::QueryMatch::CategoryRelevance categoryRelevance = KRunner::QueryMatch::CategoryRelevance::Moderate; - qreal relevance(0.6); + void matchNameKeywordAndGenericName() + { + static auto isTest = QStandardPaths::isTestModeEnabled(); - // If the term was < 3 chars and NOT at the beginning of the App's name, then chances are the user doesn't want that app - if (weightedTermLength < 3) { - if (name.startsWith(query, Qt::CaseInsensitive)) { - relevance = 0.9; - } else { - continue; - } - } else if (name.compare(query, Qt::CaseInsensitive) == 0) { - relevance = 1; - categoryRelevance = KRunner::QueryMatch::CategoryRelevance::Highest; - } else if (const auto idx = name.indexOf(queryList[0], 0, Qt::CaseInsensitive); idx != -1) { - relevance = 0.8; - relevance += increaseMatchRelevance(name, queryList, Category::Name); - if (idx == 0) { - relevance += 0.1; - categoryRelevance = KRunner::QueryMatch::CategoryRelevance::High; - } - } else if (const auto idx = service->genericName().indexOf(queryList[0], 0, Qt::CaseInsensitive); idx != -1) { - relevance = 0.65; - relevance += increaseMatchRelevance(service->genericName(), queryList, Category::GenericName); - if (idx == 0) { - relevance += 0.05; - } - } else if (const auto idx = service->comment().indexOf(queryList[0], 0, Qt::CaseInsensitive); idx != -1) { - relevance = 0.5; - relevance += increaseMatchRelevance(service->comment(), queryList, Category::Comment); - if (idx == 0) { - relevance += 0.05; - } + for (const KService::Ptr &service : m_services) { + if (isTest && !service->name().contains("ServiceRunnerTest"_L1)) { + continue; // Skip services that are not part of the test. } KRunner::QueryMatch match(m_runner); - match.setCategoryRelevance(categoryRelevance); - setupMatch(service, match); - if (service->categories().contains(QLatin1String("KDE"))) { - qCDebug(RUNNER_SERVICES) << "found a kde thing" << id << match.subtext() << relevance; - relevance += .09; - } - - if (const auto foundIt = m_runner->m_favorites.constFind(service->desktopEntryName()); foundIt != m_runner->m_favorites.cend()) { - if (foundIt->isGlobal || foundIt->linkedActivities.contains(m_currentActivity)) { - qCDebug(RUNNER_SERVICES) << "entry is a favorite" << id << match.subtext() << relevance; - relevance *= 1.25; // Give favorites a relative boost, - } + auto score = fuzzyScore(service); + if (!score || disqualify(service)) { + continue; } - qCDebug(RUNNER_SERVICES) << name << "is this relevant:" << relevance; - match.setRelevance(relevance); + setupMatch(service, match); + match.setCategoryRelevance(score->categoryRelevance); + match.setRelevance(score->value); + qCDebug(RUNNER_SERVICES) << match.text() << "is this relevant:" << match.relevance() << "category relevance" << match.categoryRelevance(); matches << match; } -- 2.51.0