From 9ab7593321d014ff63ef12590a0c2d0e721a90f1 Mon Sep 17 00:00:00 2001 From: Jhair Paris Date: Sat, 7 Jun 2025 19:56:28 -0500 Subject: [PATCH 1/3] Add OCR dependencies and build configuration - Add Tesseract and Leptonica dependencies - Configure OCR support in CMake build system --- CMakeLists.txt | 7 +++++++ src/CMakeLists.txt | 5 ++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f62a38443..3038f472c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -89,12 +89,19 @@ find_package(PlasmaWaylandProtocols REQUIRED) find_package(LayerShellQt REQUIRED) find_package(KPipeWire) find_package(OpenCV 4.7 REQUIRED core imgproc) +find_package(PkgConfig REQUIRED) +pkg_check_modules(TESSERACT REQUIRED tesseract) set_package_properties(KPipeWire PROPERTIES DESCRIPTION "Used to record pipewire streams into a file" TYPE REQUIRED ) +set_package_properties(TESSERACT PROPERTIES DESCRIPTION + "OCR (Optical Character Recognition) engine for text recognition in images" + TYPE REQUIRED +) + # optional components find_package(KF6DocTools ${KF6_MIN_VERSION}) set_package_properties(KF6DocTools PROPERTIES DESCRIPTION diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d27c2dba4..cb000b35d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -25,6 +25,7 @@ target_sources(spectacle PRIVATE CommandLineOptions.cpp ExportManager.cpp Geometry.cpp + OcrManager.cpp Gui/Annotations/AnnotationDocument.cpp Gui/Annotations/AnnotationTool.cpp Gui/Annotations/AnnotationViewport.cpp @@ -104,7 +105,7 @@ ki18n_wrap_ui(spectacle # Needed to compile with OpenCV target_compile_options (spectacle PRIVATE -fexceptions) -target_include_directories(spectacle PUBLIC ${OpenCV_INCLUDE_DIRS}) +target_include_directories(spectacle PUBLIC ${OpenCV_INCLUDE_DIRS} ${TESSERACT_INCLUDE_DIRS}) target_link_libraries(spectacle PRIVATE Qt::Concurrent @@ -135,6 +136,7 @@ target_link_libraries(spectacle PRIVATE Wayland::Client LayerShellQt::Interface ${OpenCV_LIBRARIES} + ${TESSERACT_LIBRARIES} ) # qt_add_qml_module doesn't know how to deal with headers in subdirectories so @@ -180,6 +182,7 @@ qt_target_qml_sources(spectacle Gui/InlineMessageList.qml Gui/Magnifier.qml Gui/NewScreenshotToolButton.qml + Gui/OcrAction.qml Gui/OptionsMenuButton.qml Gui/Outline.qml Gui/QmlUtils.qml -- GitLab From ae7a749c89892c8f0d5494c2d7157970578b8b3f Mon Sep 17 00:00:00 2001 From: Jhair Paris Date: Sat, 7 Jun 2025 19:57:10 -0500 Subject: [PATCH 2/3] Implement OcrManager class for text recognition - Add OcrManager class with Tesseract integration - Provide async OCR processing methods - Handle OCR initialization and cleanup Add OCR language selection to General Options - Introduced a new combo box for selecting the OCR language in the settings dialog. - Implemented methods to populate and refresh the OCR language options based on availability. Add OCR action and integrate into UI toolbars - Create OcrAction.qml for text recognition functionality - Add OCR buttons to CaptureOverlay toolbars - Add OCR button to ViewerPage main toolbar Integrate OCR functionality into capture and viewer windows Add OCR notifications and core integration - Add OCR success/error notification events to notifyrc - Integrate OCR manager in SpectacleCore Remove manual translations Implement OCR availability checks Enhance OcrManager to load Tesseract library dynamically and check its availability. Show info cursor on OCR tooltip icon in settings Refactor OCR language name handling using QLocale - Replace hardcoded/translatable language name map with dynamic lookup via QLocale and scriptToString. Refactor Tesseract initialization to support dynamic language detection Detect Tesseract and language packs at configure time; link directly to libtesseract - Add tesseract_test.cpp using TessBaseAPI::GetAvailableLanguagesAsVector() - CMake: pkg_check_modules(TESSERACT) + try_run() to check usable langpacks - Define HAVE_TESSERACT_OCR when successful; otherwise warn and disable OCR - OCR: refactor OcrManager to use tesseract::TessBaseAPI (C++ API) - Remove QLibrary-based dynamic loading and manual symbol resolution - Wrap OCR code with #ifdef HAVE_TESSERACT_OCR and provide graceful fallbacks Refactor OCR text recognition to use ResultIterator for improved accuracy Refactor the OCR core: centralize extraction in SpectacleCore, remove direct OCR handling from windows. --- CMakeLists.txt | 48 +- cmake/tesseract_test.cpp | 40 + desktop/spectacle.notifyrc | 5 + src/CMakeLists.txt | 9 +- src/Config.h.in | 3 + src/Gui/CaptureOverlay.qml | 9 + src/Gui/CaptureWindow.cpp | 4 +- src/Gui/OcrAction.qml | 14 + src/Gui/SettingsDialog/GeneralOptions.ui | 127 ++++ src/Gui/SettingsDialog/GeneralOptionsPage.cpp | 80 ++ src/Gui/SettingsDialog/GeneralOptionsPage.h | 4 + src/Gui/SettingsDialog/SettingsDialog.cpp | 5 + src/Gui/SettingsDialog/spectacle.kcfg | 4 + src/Gui/ViewerPage.qml | 5 + src/Gui/ViewerWindow.cpp | 2 +- src/OcrManager.cpp | 716 ++++++++++++++++++ src/OcrManager.h | 175 +++++ src/SpectacleCore.cpp | 143 ++++ src/SpectacleCore.h | 9 + 19 files changed, 1393 insertions(+), 9 deletions(-) create mode 100644 cmake/tesseract_test.cpp create mode 100644 src/Gui/OcrAction.qml create mode 100644 src/OcrManager.cpp create mode 100644 src/OcrManager.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 3038f472c..9b3c47fbe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -92,16 +92,54 @@ find_package(OpenCV 4.7 REQUIRED core imgproc) find_package(PkgConfig REQUIRED) pkg_check_modules(TESSERACT REQUIRED tesseract) +# Find Tesseract for OCR functionality +find_package(PkgConfig QUIET) +if(PkgConfig_FOUND) + pkg_check_modules(TESSERACT tesseract) + + if(TESSERACT_FOUND) + # Test if Tesseract has usable language packs + try_run( + TESSERACT_TEST_RUN_RESULT + TESSERACT_TEST_COMPILE_RESULT + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/cmake/tesseract_test.cpp + LINK_LIBRARIES ${TESSERACT_LIBRARIES} + CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${TESSERACT_INCLUDE_DIRS}" + COMPILE_OUTPUT_VARIABLE TESSERACT_COMPILE_OUTPUT + RUN_OUTPUT_VARIABLE TESSERACT_RUN_OUTPUT + ) + + if(TESSERACT_TEST_COMPILE_RESULT AND TESSERACT_TEST_RUN_RESULT EQUAL 0) + message(STATUS "Tesseract OCR support enabled") + message(STATUS "${TESSERACT_RUN_OUTPUT}") + set(HAVE_TESSERACT_OCR TRUE) + else() + message(WARNING "Tesseract library found but no usable language packs detected") + message(WARNING "${TESSERACT_RUN_OUTPUT}") + message(WARNING "OCR functionality will be disabled. Install language data packages (e.g., tesseract-ocr-eng)") + set(HAVE_TESSERACT_OCR FALSE) + endif() + else() + message(STATUS "Tesseract not found - OCR functionality disabled") + set(HAVE_TESSERACT_OCR FALSE) + endif() + + set_package_properties(TESSERACT PROPERTIES + DESCRIPTION "OCR engine for text recognition in screenshots" + TYPE OPTIONAL + PURPOSE "Enables optical character recognition functionality" + ) +else() + message(STATUS "PkgConfig not found - Tesseract detection disabled") + set(HAVE_TESSERACT_OCR FALSE) +endif() + set_package_properties(KPipeWire PROPERTIES DESCRIPTION "Used to record pipewire streams into a file" TYPE REQUIRED ) -set_package_properties(TESSERACT PROPERTIES DESCRIPTION - "OCR (Optical Character Recognition) engine for text recognition in images" - TYPE REQUIRED -) - # optional components find_package(KF6DocTools ${KF6_MIN_VERSION}) set_package_properties(KF6DocTools PROPERTIES DESCRIPTION diff --git a/cmake/tesseract_test.cpp b/cmake/tesseract_test.cpp new file mode 100644 index 000000000..4ebae9779 --- /dev/null +++ b/cmake/tesseract_test.cpp @@ -0,0 +1,40 @@ +#include +#include +#include +#include + +int main() +{ + tesseract::TessBaseAPI api; + + if (api.Init(nullptr, nullptr) != 0) { + std::cerr << "Failed to initialize Tesseract" << std::endl; + return 1; + } + + std::vector languages; + api.GetAvailableLanguagesAsVector(&languages); + + // Filter out 'osd' as it's not a usable language for OCR + std::vector usableLanguages; + for (const auto &lang : languages) { + if (lang != "osd") { + usableLanguages.push_back(lang); + } + } + + if (usableLanguages.empty()) { + std::cerr << "No usable Tesseract language packs found. Install language data files (e.g., tesseract-ocr-eng)" << std::endl; + return 1; + } + + std::cout << "Found " << usableLanguages.size() << " Tesseract language pack(s): "; + for (size_t i = 0; i < usableLanguages.size(); ++i) { + std::cout << usableLanguages[i]; + if (i < usableLanguages.size() - 1) + std::cout << ", "; + } + std::cout << std::endl; + + return 0; +} diff --git a/desktop/spectacle.notifyrc b/desktop/spectacle.notifyrc index 5c4166f0b..f3f65f679 100644 --- a/desktop/spectacle.notifyrc +++ b/desktop/spectacle.notifyrc @@ -306,3 +306,8 @@ Comment[uk]=Було створено і збережено новий запи Comment[zh_CN]=已录制并保存新的屏幕录像 Comment[zh_TW]=新的螢幕錄製已擷取並儲存 Action=Popup + +[Event/ocrTextExtracted] +Name=Text Extracted +Comment=Text has been extracted from image using OCR +Action=Popup diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cb000b35d..c57535e34 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -105,7 +105,7 @@ ki18n_wrap_ui(spectacle # Needed to compile with OpenCV target_compile_options (spectacle PRIVATE -fexceptions) -target_include_directories(spectacle PUBLIC ${OpenCV_INCLUDE_DIRS} ${TESSERACT_INCLUDE_DIRS}) +target_include_directories(spectacle PUBLIC ${OpenCV_INCLUDE_DIRS}) target_link_libraries(spectacle PRIVATE Qt::Concurrent @@ -136,9 +136,14 @@ target_link_libraries(spectacle PRIVATE Wayland::Client LayerShellQt::Interface ${OpenCV_LIBRARIES} - ${TESSERACT_LIBRARIES} ) +# Link against Tesseract when OCR support is enabled +if(HAVE_TESSERACT_OCR) + target_include_directories(spectacle PRIVATE ${TESSERACT_INCLUDE_DIRS}) + target_link_libraries(spectacle PRIVATE ${TESSERACT_LIBRARIES}) +endif() + # qt_add_qml_module doesn't know how to deal with headers in subdirectories so # make sure to add those so the headers can be found. target_include_directories(spectacle PRIVATE diff --git a/src/Config.h.in b/src/Config.h.in index 15313542a..aadb22252 100644 --- a/src/Config.h.in +++ b/src/Config.h.in @@ -7,6 +7,9 @@ /* Define to 1 if we have Purpose */ #cmakedefine PURPOSE_FOUND 1 +/* Define to 1 if we have Tesseract OCR */ +#cmakedefine HAVE_TESSERACT_OCR 1 + /* Set the Spectacle version from CMake */ #cmakedefine SPECTACLE_VERSION "@SPECTACLE_VERSION@" diff --git a/src/Gui/CaptureOverlay.qml b/src/Gui/CaptureOverlay.qml index d9ca9a11c..37f3dcf85 100644 --- a/src/Gui/CaptureOverlay.qml +++ b/src/Gui/CaptureOverlay.qml @@ -506,6 +506,11 @@ MouseArea { visible: action.enabled action: CopyImageAction {} } + ToolButton { + display: TtToolButton.IconOnly + visible: action.enabled && !SpectacleCore.videoMode && SpectacleCore.ocrAvailable + action: OcrAction {} + } ExportMenuButton { focusPolicy: Qt.NoFocus } @@ -532,6 +537,10 @@ MouseArea { visible: action.enabled action: CopyImageAction {} } + ToolButton { + visible: action.enabled && !SpectacleCore.videoMode && SpectacleCore.ocrAvailable + action: OcrAction {} + } ExportMenuButton { focusPolicy: Qt.NoFocus } diff --git a/src/Gui/CaptureWindow.cpp b/src/Gui/CaptureWindow.cpp index fc4509cf3..cb8ce97ab 100644 --- a/src/Gui/CaptureWindow.cpp +++ b/src/Gui/CaptureWindow.cpp @@ -8,11 +8,13 @@ #include "CaptureWindow.h" #include "Config.h" -#include "SpectacleCore.h" #include "Gui/SelectionEditor.h" +#include "SpectacleCore.h" #include #include +#include +#include using namespace Qt::StringLiterals; diff --git a/src/Gui/OcrAction.qml b/src/Gui/OcrAction.qml new file mode 100644 index 000000000..f887ec0ee --- /dev/null +++ b/src/Gui/OcrAction.qml @@ -0,0 +1,14 @@ +/* SPDX-FileCopyrightText: 2025 Jhair Paris + * SPDX-License-Identifier: LGPL-2.0-or-later + */ + +import QtQuick.Templates as T +import org.kde.spectacle.private + +T.Action { + // OCR is only available for screenshots, not videos, and only when OCR is properly available + enabled: !SpectacleCore.videoMode && SpectacleCore.ocrAvailable + icon.name: "document-scan" + text: i18nc("@action", "Extract Text") + onTriggered: contextWindow.extractText() +} diff --git a/src/Gui/SettingsDialog/GeneralOptions.ui b/src/Gui/SettingsDialog/GeneralOptions.ui index 1d99e9a33..ddbbf3e5a 100644 --- a/src/Gui/SettingsDialog/GeneralOptions.ui +++ b/src/Gui/SettingsDialog/GeneralOptions.ui @@ -239,6 +239,132 @@ + + + + Qt::Vertical + + + QSizePolicy::Fixed + + + + 10 + 10 + + + + + + + + Text Recognition (OCR) + + + + + + + Language: + + + + + + + + 0 + 0 + + + + currentData + + + + + + + false + + + + 0 + 0 + + + + + 0 + + + 6 + + + + + OCR functionality is not available + + + + 0 + 0 + + + + false + + + + + + + Please install the required packages: +• tesseract +• tesseract language data (e.g., tesseract-ocr-eng for English) + + + + 0 + 0 + + + + Qt::AlignCenter + + + + 16 + 16 + + + + + 16 + 16 + + + + true + + + + + + + Qt::Horizontal + + + + 40 + 20 + + + + + + + @@ -257,6 +383,7 @@ kcfg_useReleaseToCapture kcfg_showCaptureInstructions kcfg_rememberSelectionRect + kcfg_ocrLanguage diff --git a/src/Gui/SettingsDialog/GeneralOptionsPage.cpp b/src/Gui/SettingsDialog/GeneralOptionsPage.cpp index fcea6f671..5b8a5d9fc 100644 --- a/src/Gui/SettingsDialog/GeneralOptionsPage.cpp +++ b/src/Gui/SettingsDialog/GeneralOptionsPage.cpp @@ -1,4 +1,5 @@ /* + * SPDX-FileCopyrightText: 2025 Jhair Paris * SPDX-FileCopyrightText: 2019 David Redondo * SPDX-FileCopyrightText: 2015 Boudhayan Gupta * @@ -9,10 +10,13 @@ #include "settings.h" #include "ui_GeneralOptions.h" +#include "OcrManager.h" #include +#include #include +#include GeneralOptionsPage::GeneralOptionsPage(QWidget *parent) : QWidget(parent) @@ -20,8 +24,16 @@ GeneralOptionsPage::GeneralOptionsPage(QWidget *parent) { m_ui->setupUi(this); + m_ui->ocrInfoIcon->setPixmap(QIcon::fromTheme(QStringLiteral("help-hint")).pixmap(16, 16)); + m_ui->ocrInfoIcon->setCursor(Qt::WhatsThisCursor); + m_ui->runningTitle->setLevel(2); m_ui->regionTitle->setLevel(2); + m_ui->ocrTitle->setLevel(2); + + setupOcrLanguageComboBox(); + + connect(OcrManager::instance(), &OcrManager::statusChanged, this, &GeneralOptionsPage::refreshOcrLanguageSettings); //On Wayland we can't programmatically raise and focus the window so we have to hide the option if (KWindowSystem::isPlatformWayland() || qstrcmp(qgetenv("XDG_SESSION_TYPE").constData(), "wayland") == 0) { @@ -31,4 +43,72 @@ GeneralOptionsPage::GeneralOptionsPage(QWidget *parent) GeneralOptionsPage::~GeneralOptionsPage() = default; +void GeneralOptionsPage::setupOcrLanguageComboBox() +{ + OcrManager *ocrManager = OcrManager::instance(); + + if (!ocrManager->isAvailable()) { + m_ui->kcfg_ocrLanguage->setEnabled(false); + m_ui->kcfg_ocrLanguage->addItem(i18n("OCR not available")); + m_ui->ocrLanguageLabel->setVisible(false); + m_ui->kcfg_ocrLanguage->setVisible(false); + m_ui->ocrUnavailableWidget->setVisible(true); + return; + } + + const auto availableLanguages = ocrManager->availableLanguagesWithNames(); + + if (availableLanguages.isEmpty()) { + m_ui->kcfg_ocrLanguage->addItem(i18n("No languages found")); + m_ui->kcfg_ocrLanguage->setEnabled(false); + return; + } + + m_ui->kcfg_ocrLanguage->clear(); + m_ui->ocrLanguageLabel->setVisible(true); + m_ui->kcfg_ocrLanguage->setVisible(true); + m_ui->ocrUnavailableWidget->setVisible(false); + + for (auto it = availableLanguages.constBegin(); it != availableLanguages.constEnd(); ++it) { + m_ui->kcfg_ocrLanguage->addItem(it.value(), it.key()); + } +} + +void GeneralOptionsPage::refreshOcrLanguageSettings() +{ + OcrManager *ocrManager = OcrManager::instance(); + + if (!ocrManager->isAvailable()) { + m_ui->ocrLanguageLabel->setVisible(false); + m_ui->kcfg_ocrLanguage->setVisible(false); + m_ui->ocrUnavailableWidget->setVisible(true); + return; + } + + const auto availableLanguages = ocrManager->availableLanguagesWithNames(); + + if (availableLanguages.isEmpty()) { + return; + } + + m_ui->kcfg_ocrLanguage->clear(); + m_ui->kcfg_ocrLanguage->setEnabled(true); + m_ui->ocrLanguageLabel->setVisible(true); + m_ui->kcfg_ocrLanguage->setVisible(true); + m_ui->ocrUnavailableWidget->setVisible(false); + + for (auto it = availableLanguages.constBegin(); it != availableLanguages.constEnd(); ++it) { + m_ui->kcfg_ocrLanguage->addItem(it.value(), it.key()); + } + + const QString currentLanguage = Settings::ocrLanguage(); + + for (int i = 0; i < m_ui->kcfg_ocrLanguage->count(); ++i) { + if (m_ui->kcfg_ocrLanguage->itemData(i).toString() == currentLanguage) { + m_ui->kcfg_ocrLanguage->setCurrentIndex(i); + break; + } + } +} + #include "moc_GeneralOptionsPage.cpp" diff --git a/src/Gui/SettingsDialog/GeneralOptionsPage.h b/src/Gui/SettingsDialog/GeneralOptionsPage.h index d8e7c5003..c184d6ba8 100644 --- a/src/Gui/SettingsDialog/GeneralOptionsPage.h +++ b/src/Gui/SettingsDialog/GeneralOptionsPage.h @@ -19,8 +19,12 @@ class GeneralOptionsPage : public QWidget public: explicit GeneralOptionsPage(QWidget *parent = nullptr); ~GeneralOptionsPage() override; + + void refreshOcrLanguageSettings(); private: + void setupOcrLanguageComboBox(); + QScopedPointer m_ui; }; diff --git a/src/Gui/SettingsDialog/SettingsDialog.cpp b/src/Gui/SettingsDialog/SettingsDialog.cpp index a37d8344c..a19a47627 100644 --- a/src/Gui/SettingsDialog/SettingsDialog.cpp +++ b/src/Gui/SettingsDialog/SettingsDialog.cpp @@ -64,6 +64,9 @@ void SettingsDialog::showEvent(QShowEvent *event) auto parent = parentWidget(); bool onTop = parent && parent->windowHandle()->flags().testFlag(Qt::WindowStaysOnTopHint); windowHandle()->setFlag(Qt::WindowStaysOnTopHint, onTop); + + m_generalPage->refreshOcrLanguageSettings(); + KConfigDialog::showEvent(event); } @@ -87,6 +90,8 @@ void SettingsDialog::updateWidgets() { KConfigDialog::updateWidgets(); m_shortcutsPage->resetChanges(); + + m_generalPage->refreshOcrLanguageSettings(); } void SettingsDialog::updateWidgetsDefault() diff --git a/src/Gui/SettingsDialog/spectacle.kcfg b/src/Gui/SettingsDialog/spectacle.kcfg index e37b9e5b4..4517e2344 100644 --- a/src/Gui/SettingsDialog/spectacle.kcfg +++ b/src/Gui/SettingsDialog/spectacle.kcfg @@ -70,6 +70,10 @@ UntilClosed + + + eng + diff --git a/src/Gui/ViewerPage.qml b/src/Gui/ViewerPage.qml index 6e77887a8..602e4431b 100644 --- a/src/Gui/ViewerPage.qml +++ b/src/Gui/ViewerPage.qml @@ -61,6 +61,11 @@ EmptyPage { visible: action.enabled action: CopyImageAction {} } + TtToolButton { + display: TtToolButton.IconOnly + visible: action.enabled && SpectacleCore.ocrAvailable + action: OcrAction {} + } // We only show this in video mode to save space in screenshot mode TtToolButton { visible: SpectacleCore.videoMode diff --git a/src/Gui/ViewerWindow.cpp b/src/Gui/ViewerWindow.cpp index 68812495d..8c0d9941f 100644 --- a/src/Gui/ViewerWindow.cpp +++ b/src/Gui/ViewerWindow.cpp @@ -8,9 +8,9 @@ #include "ViewerWindow.h" #include "Config.h" -#include "SpectacleCore.h" #include "Gui/ExportMenu.h" #include "InlineMessageModel.h" +#include "SpectacleCore.h" #include #include diff --git a/src/OcrManager.cpp b/src/OcrManager.cpp new file mode 100644 index 000000000..1d09db8ef --- /dev/null +++ b/src/OcrManager.cpp @@ -0,0 +1,716 @@ +/* This file is part of Spectacle, the KDE screenshot utility + * SPDX-FileCopyrightText: 2025 Jhair Paris + * SPDX-License-Identifier: LGPL-2.0-or-later + */ + +#include "OcrManager.h" +#include "settings.h" +#include "spectacle_debug.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +using namespace Qt::StringLiterals; + +OcrManager *OcrManager::s_instance = nullptr; + +OcrManager::OcrManager(QObject *parent) + : QObject(parent) +#ifdef HAVE_TESSERACT_OCR + , m_tesseract(nullptr) + , m_worker(nullptr) +#endif + , m_workerThread(std::make_unique()) + , m_timeoutTimer(new QTimer(this)) + , m_status(OcrStatus::Ready) + , m_currentLanguageCode() // Current language code ("eng+spa") + , m_configuredLanguages() // Languages from Settings (persistent) + , m_activeLanguages() + , m_shouldRestoreToConfigured(false) // Flag to restore after temp language use + , m_initialized(false) +{ +#ifdef HAVE_TESSERACT_OCR + m_timeoutTimer->setSingleShot(true); + m_timeoutTimer->setInterval(30000); + + connect(m_timeoutTimer, &QTimer::timeout, this, [this]() { + qCWarning(SPECTACLE_LOG) << "OCR recognition timed out"; + setStatus(OcrStatus::Error); + }); + + m_worker = new OcrWorker(); + m_worker->moveToThread(m_workerThread.get()); + connect(m_worker, &OcrWorker::imageProcessed, this, &OcrManager::handleRecognitionComplete); + m_workerThread->start(); + + connect(Settings::self(), &Settings::ocrLanguagesChanged, this, [this]() { + const QStringList newLanguages = Settings::ocrLanguages(); + const QString combinedLanguages = newLanguages.join(u"+"_s); + if (combinedLanguages != m_currentLanguageCode) { + setLanguagesByCode(newLanguages); + } + }); + + QTimer::singleShot(0, this, &OcrManager::initializeTesseract); +#endif +} + +OcrManager::~OcrManager() +{ +#ifdef HAVE_TESSERACT_OCR + if (m_worker) { + if (m_workerThread && m_workerThread->isRunning()) { + QMetaObject::invokeMethod(m_worker, &QObject::deleteLater, Qt::QueuedConnection); + } else { + delete m_worker; + } + m_worker = nullptr; + } +#endif + if (m_workerThread && m_workerThread->isRunning()) { + m_workerThread->quit(); + m_workerThread->wait(3000); + } +#ifdef HAVE_TESSERACT_OCR + if (m_tesseract) { + m_tesseract->End(); + delete m_tesseract; + m_tesseract = nullptr; + } +#endif +} + +OcrManager *OcrManager::instance() +{ + if (!s_instance) { + s_instance = new OcrManager(qApp); + } + return s_instance; +} + +bool OcrManager::isAvailable() const +{ +#ifdef HAVE_TESSERACT_OCR + return m_initialized && m_tesseract != nullptr; +#else + return false; +#endif +} + +OcrManager::OcrStatus OcrManager::status() const +{ + return m_status; +} + +QMap OcrManager::availableLanguagesWithNames() const +{ + QMap result; + for (const QString &langCode : m_availableLanguages) { + result[langCode] = m_languageNames.value(langCode, langCode); + } + return result; +} + +void OcrManager::setLanguagesByCode(const QStringList &languageCodes) +{ +#ifdef HAVE_TESSERACT_OCR + if (languageCodes.isEmpty()) { + qCWarning(SPECTACLE_LOG) << "No OCR languages specified"; + return; + } + + if (validateAndApplyLanguages(languageCodes)) { + m_configuredLanguages = m_activeLanguages; + Settings::setOcrLanguages(m_activeLanguages); + Settings::self()->save(); + qCDebug(SPECTACLE_LOG) << "OCR languages successfully changed to:" << m_currentLanguageCode; + } else { + qCWarning(SPECTACLE_LOG) << "Failed to set OCR languages"; + } +#else + Q_UNUSED(languageCodes); + qCWarning(SPECTACLE_LOG) << "OCR not available - Tesseract not compiled in"; +#endif +} + +QString OcrManager::currentLanguageCode() const +{ + return m_currentLanguageCode; +} + +void OcrManager::recognizeText(const QImage &image) +{ +#ifdef HAVE_TESSERACT_OCR + if (!isAvailable()) { + qCWarning(SPECTACLE_LOG) << "Cannot start OCR: engine is not available"; + Q_EMIT textRecognized(QString(), false); + return; + } + + if (m_status == OcrStatus::Processing) { + qCWarning(SPECTACLE_LOG) << "Cannot start OCR: text extraction already running"; + Q_EMIT textRecognized(QString(), false); + return; + } + + if (image.isNull() || image.size().isEmpty()) { + qCWarning(SPECTACLE_LOG) << "Cannot start OCR: invalid image provided"; + Q_EMIT textRecognized(QString(), false); + return; + } + + // Ensure configured languages are active + if (m_configuredLanguages.isEmpty() || m_activeLanguages != m_configuredLanguages) { + if (!validateAndApplyLanguages(m_configuredLanguages)) { + qCWarning(SPECTACLE_LOG) << "Cannot start OCR: failed to activate configured languages"; + Q_EMIT textRecognized(QString(), false); + return; + } + } + + beginRecognition(image); +#else + Q_UNUSED(image); + qCWarning(SPECTACLE_LOG) << "Cannot start OCR: Spectacle built without Tesseract support"; + Q_EMIT textRecognized(QString(), false); +#endif +} + +void OcrManager::recognizeTextWithLanguage(const QImage &image, const QString &languageCode) +{ +#ifdef HAVE_TESSERACT_OCR + if (languageCode.isEmpty()) { + recognizeText(image); + return; + } + + if (!isAvailable()) { + qCWarning(SPECTACLE_LOG) << "Cannot start OCR with language" << languageCode << ": engine is not available"; + Q_EMIT textRecognized(QString(), false); + return; + } + + if (m_status == OcrStatus::Processing) { + qCWarning(SPECTACLE_LOG) << "Cannot start OCR with language" << languageCode << ": text extraction already running"; + Q_EMIT textRecognized(QString(), false); + return; + } + + if (image.isNull() || image.size().isEmpty()) { + qCWarning(SPECTACLE_LOG) << "Cannot start OCR with language" << languageCode << ": invalid image provided"; + Q_EMIT textRecognized(QString(), false); + return; + } + + const QStringList tempLanguages{languageCode}; + if (!validateAndApplyLanguages(tempLanguages)) { + qCWarning(SPECTACLE_LOG) << "Cannot start OCR with language" << languageCode << ": failed to activate language"; + Q_EMIT textRecognized(QString(), false); + return; + } + + // Store that we need to restore after recognition + m_shouldRestoreToConfigured = (m_activeLanguages != m_configuredLanguages); + + beginRecognition(image); +#else + Q_UNUSED(image); + Q_UNUSED(languageCode); + qCWarning(SPECTACLE_LOG) << "Cannot start OCR: Spectacle built without Tesseract support"; + Q_EMIT textRecognized(QString(), false); +#endif +} + +void OcrManager::handleRecognitionComplete(const QString &text, bool success) +{ + m_timeoutTimer->stop(); + + if (success) { + setStatus(OcrStatus::Ready); + + if (!text.isEmpty()) { + QApplication::clipboard()->setText(text); + } + + Q_EMIT textRecognized(text, true); + qCDebug(SPECTACLE_LOG) << "OCR recognition completed successfully"; + } else { + setStatus(OcrStatus::Error); + Q_EMIT textRecognized(QString(), false); + qCWarning(SPECTACLE_LOG) << "OCR recognition failed"; + } + + // Restore configured languages if we used temporary ones + if (m_shouldRestoreToConfigured && !m_configuredLanguages.isEmpty()) { + validateAndApplyLanguages(m_configuredLanguages); + m_shouldRestoreToConfigured = false; + } +} + +bool OcrManager::validateAndApplyLanguages(const QStringList &languageCodes) +{ +#ifdef HAVE_TESSERACT_OCR + if (languageCodes.isEmpty()) { + qCWarning(SPECTACLE_LOG) << "No OCR languages provided"; + return false; + } + + QStringList validLanguages; + for (const QString &lang : languageCodes) { + if (lang == u"osd"_s) { + qCDebug(SPECTACLE_LOG) << "Skipping 'osd' language"; + continue; + } + + if (!isLanguageAvailable(lang)) { + qCWarning(SPECTACLE_LOG) << "OCR language not available:" << lang; + continue; + } + + if (!validLanguages.contains(lang)) { + validLanguages.append(lang); + } + } + + if (validLanguages.isEmpty()) { + qCWarning(SPECTACLE_LOG) << "No valid OCR languages after filtering"; + return false; + } + + if (validLanguages.size() > MAX_OCR_LANGUAGES) { + validLanguages = validLanguages.mid(0, MAX_OCR_LANGUAGES); + qCInfo(SPECTACLE_LOG) << "Limited to" << MAX_OCR_LANGUAGES << "languages:" << validLanguages; + } + + const QString combinedLanguages = validLanguages.join(u"+"_s); + + if (m_currentLanguageCode == combinedLanguages && !m_activeLanguages.isEmpty()) { + qCDebug(SPECTACLE_LOG) << "Languages already active, no change needed"; + return true; + } + + if (!setupTesseractLanguages(validLanguages)) { + qCWarning(SPECTACLE_LOG) << "Failed to apply OCR languages:" << combinedLanguages; + return false; + } + + m_activeLanguages = validLanguages; + m_currentLanguageCode = combinedLanguages; + + qCDebug(SPECTACLE_LOG) << "OCR languages applied:" << combinedLanguages; + return true; +#else + Q_UNUSED(languageCodes); + return false; +#endif +} + +void OcrManager::beginRecognition(const QImage &image) +{ +#ifdef HAVE_TESSERACT_OCR + setStatus(OcrStatus::Processing); + m_timeoutTimer->start(); + + QMetaObject::invokeMethod( + m_worker, + [worker = m_worker, image, tesseract = m_tesseract]() { + worker->processImage(image, tesseract); + }, + Qt::QueuedConnection); +#else + Q_UNUSED(image); +#endif +} + +void OcrManager::initializeTesseract() +{ +#ifdef HAVE_TESSERACT_OCR + try { + m_tesseract = new tesseract::TessBaseAPI(); + + if (m_tesseract->Init(nullptr, nullptr) != 0) { + qCWarning(SPECTACLE_LOG) << "Failed to initialize Tesseract OCR engine with auto-detection"; + setStatus(OcrStatus::Error); + delete m_tesseract; + m_tesseract = nullptr; + return; + } + + const char *datapath = m_tesseract->GetDatapath(); + QString tessdataPath = datapath ? QString::fromUtf8(datapath) : QString(); + qCDebug(SPECTACLE_LOG) << "Using tessdata path: " << tessdataPath; + + setupAvailableLanguages(tessdataPath); + + if (m_availableLanguages.isEmpty()) { + qCWarning(SPECTACLE_LOG) << "No language data files found in tessdata directory"; + setStatus(OcrStatus::Error); + delete m_tesseract; + m_tesseract = nullptr; + return; + } + + m_tesseract->End(); + + QStringList configLanguages = Settings::ocrLanguages(); + QStringList initLanguages; + + // Use configured languages if valid, otherwise fallback to first available + for (const QString &lang : configLanguages) { + if (!lang.isEmpty() && m_availableLanguages.contains(lang) && lang != u"osd"_s) { + initLanguages.append(lang); + } + } + + if (initLanguages.isEmpty()) { + auto it = std::find_if(m_availableLanguages.begin(), m_availableLanguages.end(), [](const QString &lang) { + return lang != u"osd"_s; + }); + + if (it != m_availableLanguages.end()) { + initLanguages.append(*it); + } else { + qCCritical(SPECTACLE_LOG) << "No fallback language available (only osd present)"; + setStatus(OcrStatus::Error); + delete m_tesseract; + m_tesseract = nullptr; + return; + } + } + + const QString combinedInitLanguages = initLanguages.join(u"+"_s); + qCDebug(SPECTACLE_LOG) << "Initializing Tesseract with languages:" << combinedInitLanguages; + + if (m_tesseract->Init(nullptr, combinedInitLanguages.toUtf8().constData()) != 0) { + qCWarning(SPECTACLE_LOG) << "Failed to initialize Tesseract with languages:" << combinedInitLanguages; + setStatus(OcrStatus::Error); + delete m_tesseract; + m_tesseract = nullptr; + return; + } + + m_currentLanguageCode = combinedInitLanguages; + m_tesseract->SetPageSegMode(tesseract::PSM_AUTO); + + m_initialized = true; + setStatus(OcrStatus::Ready); + qCDebug(SPECTACLE_LOG) << "Tesseract OCR engine initialized successfully with languages:" << combinedInitLanguages; + + loadSavedLanguageSetting(); + } catch (const std::exception &e) { + qCWarning(SPECTACLE_LOG) << "Exception during Tesseract initialization:" << e.what(); + setStatus(OcrStatus::Error); + if (m_tesseract) { + delete m_tesseract; + m_tesseract = nullptr; + } + } +#else + qCDebug(SPECTACLE_LOG) << "Tesseract OCR not available - compiled out"; + setStatus(OcrStatus::Error); +#endif +} + +void OcrManager::loadSavedLanguageSetting() +{ + if (!isAvailable()) { + qCDebug(SPECTACLE_LOG) << "OCR not available, skipping language loading"; + return; + } + + QStringList savedLanguages = Settings::ocrLanguages(); + qCDebug(SPECTACLE_LOG) << "Loaded OCR languages setting from config:" << savedLanguages; + qCDebug(SPECTACLE_LOG) << "Current OCR language code:" << m_currentLanguageCode; + qCDebug(SPECTACLE_LOG) << "Available languages:" << m_availableLanguages; + + QStringList validLanguages; + for (const QString &lang : savedLanguages) { + if (lang != u"osd"_s && isLanguageAvailable(lang)) { + validLanguages.append(lang); + } + } + + if (validLanguages.isEmpty()) { + // Find first valid language as fallback + auto it = std::find_if(m_availableLanguages.begin(), m_availableLanguages.end(), [](const QString &lang) { + return lang != u"osd"_s; + }); + if (it != m_availableLanguages.end()) { + validLanguages.append(*it); + } else { + qCWarning(SPECTACLE_LOG) << "No usable languages available (only osd present), cannot set default"; + return; + } + qCDebug(SPECTACLE_LOG) << "No valid saved languages, using default:" << validLanguages; + Settings::setOcrLanguages(validLanguages); + Settings::self()->save(); + } + + m_configuredLanguages = validLanguages; + + const QString combinedLanguages = validLanguages.join(u"+"_s); + if (combinedLanguages != m_currentLanguageCode) { + qCDebug(SPECTACLE_LOG) << "Loading OCR languages setting:" << validLanguages; + validateAndApplyLanguages(validLanguages); + } else { + qCDebug(SPECTACLE_LOG) << "OCR languages already set to:" << combinedLanguages; + m_activeLanguages = validLanguages; + } +} + +void OcrManager::setStatus(OcrStatus status) +{ + if (m_status == status) { + return; + } + + m_status = status; + Q_EMIT statusChanged(status); +} + +bool OcrManager::isLanguageAvailable(const QString &languageCode) const +{ + return m_availableLanguages.contains(languageCode); +} + +bool OcrManager::setupTesseractLanguages(const QStringList &langCodes) +{ +#ifdef HAVE_TESSERACT_OCR + if (!m_tesseract || langCodes.isEmpty()) { + return false; + } + + const char *datapath = m_tesseract->GetDatapath(); + QString tessdataPath = datapath ? QString::fromUtf8(datapath) : QString(); + + if (tessdataPath.isEmpty()) { + qCWarning(SPECTACLE_LOG) << "Tessdata path not found"; + return false; + } + + for (const QString &langCode : langCodes) { + const QString langFile = QDir(tessdataPath).filePath(langCode + u".traineddata"_s); + if (!QFile::exists(langFile)) { + qCWarning(SPECTACLE_LOG) << "Language file not found:" << langFile; + return false; + } + } + + try { + m_tesseract->End(); + + const QString combinedLangs = langCodes.join(u"+"_s); + + if (m_tesseract->Init(nullptr, combinedLangs.toUtf8().constData()) != 0) { + qCWarning(SPECTACLE_LOG) << "Failed to initialize Tesseract with languages:" << combinedLangs; + + // Fallback to first available language + QString fallbackLang; + if (!m_availableLanguages.isEmpty()) { + auto it = std::find_if(m_availableLanguages.begin(), m_availableLanguages.end(), [](const QString &lang) { + return lang != u"osd"_s; + }); + if (it != m_availableLanguages.end()) { + fallbackLang = *it; + } + } + + if (!fallbackLang.isEmpty() && m_tesseract->Init(nullptr, fallbackLang.toUtf8().constData()) != 0) { + qCCritical(SPECTACLE_LOG) << "Failed to fallback to language:" << fallbackLang; + return false; + } + return false; + } + + m_tesseract->SetPageSegMode(tesseract::PSM_AUTO); + return true; + } catch (const std::exception &e) { + qCWarning(SPECTACLE_LOG) << "Exception while setting up Tesseract languages:" << e.what(); + return false; + } +#else + Q_UNUSED(langCodes); + return false; +#endif +} + +void OcrManager::setupAvailableLanguages(const QString &tessdataPath) +{ +#ifdef HAVE_TESSERACT_OCR + m_availableLanguages.clear(); + m_languageNames.clear(); + + if (!m_tesseract) { + qCWarning(SPECTACLE_LOG) << "Cannot enumerate OCR languages: Tesseract not initialized"; + return; + } + + QStringList detectedLanguages; + + try { + std::vector available; + m_tesseract->GetAvailableLanguagesAsVector(&available); + detectedLanguages.reserve(static_cast(available.size())); + + for (const std::string &language : available) { + const QString langCode = QString::fromStdString(language); + if (langCode.isEmpty()) { + continue; + } + + if (!tessdataPath.isEmpty()) { + const QString trainedDataPath = QDir(tessdataPath).filePath(langCode + u".traineddata"_s); + if (!QFile::exists(trainedDataPath)) { + qCDebug(SPECTACLE_LOG) << "Skipping OCR language" << langCode << "- missing traineddata at" << trainedDataPath; + continue; + } + } + + if (!detectedLanguages.contains(langCode)) { + detectedLanguages.append(langCode); + } + } + } catch (const std::exception &e) { + qCWarning(SPECTACLE_LOG) << "Exception while enumerating Tesseract languages:" << e.what(); + } + + std::sort(detectedLanguages.begin(), detectedLanguages.end()); + m_availableLanguages = detectedLanguages; + + for (const QString &langCode : std::as_const(m_availableLanguages)) { + if (langCode == u"osd"_s) { + m_languageNames.insert(langCode, i18nc("@item:inlistbox", "Orientation and Script Detection")); + continue; + } + + const QString displayName = tesseractLangName(langCode); + m_languageNames.insert(langCode, displayName); + } + + qCDebug(SPECTACLE_LOG) << "Detected OCR languages:" << m_availableLanguages; +#else + Q_UNUSED(tessdataPath); +#endif +} + +QString OcrManager::tesseractLangName(const QString &tesseractCode) const +{ + static const QMap tesseractToIsoMap = { + {u"afr"_s, u"af"_s}, {u"ara"_s, u"ar"_s}, {u"aze"_s, u"az"_s}, {u"aze_cyrl"_s, u"az"_s}, {u"bel"_s, u"be"_s}, + {u"ben"_s, u"bn"_s}, {u"bul"_s, u"bg"_s}, {u"cat"_s, u"ca"_s}, {u"ces"_s, u"cs"_s}, {u"chi_sim"_s, u"zh_CN"_s}, + {u"chi_tra"_s, u"zh_TW"_s}, {u"cym"_s, u"cy"_s}, {u"dan"_s, u"da"_s}, {u"dan_frak"_s, u"da"_s}, {u"deu"_s, u"de"_s}, + {u"deu_frak"_s, u"de"_s}, {u"deu_latf"_s, u"de"_s}, {u"ell"_s, u"el"_s}, {u"eng"_s, u"en"_s}, {u"epo"_s, u"eo"_s}, + {u"est"_s, u"et"_s}, {u"eus"_s, u"eu"_s}, {u"fas"_s, u"fa"_s}, {u"fin"_s, u"fi"_s}, {u"fra"_s, u"fr"_s}, + {u"frk"_s, u"de"_s}, {u"gla"_s, u"gd"_s}, {u"gle"_s, u"ga"_s}, {u"glg"_s, u"gl"_s}, {u"heb"_s, u"he"_s}, + {u"hin"_s, u"hi"_s}, {u"hrv"_s, u"hr"_s}, {u"hun"_s, u"hu"_s}, {u"ind"_s, u"id"_s}, {u"isl"_s, u"is"_s}, + {u"ita"_s, u"it"_s}, {u"ita_old"_s, u"it"_s}, {u"jpn"_s, u"ja"_s}, {u"kor"_s, u"ko"_s}, {u"kor_vert"_s, u"ko"_s}, + {u"lav"_s, u"lv"_s}, {u"lit"_s, u"lt"_s}, {u"nld"_s, u"nl"_s}, {u"nor"_s, u"no"_s}, {u"pol"_s, u"pl"_s}, + {u"por"_s, u"pt"_s}, {u"ron"_s, u"ro"_s}, {u"rus"_s, u"ru"_s}, {u"slk"_s, u"sk"_s}, {u"slk_frak"_s, u"sk"_s}, + {u"slv"_s, u"sl"_s}, {u"spa"_s, u"es"_s}, {u"spa_old"_s, u"es"_s}, {u"srp"_s, u"sr"_s}, {u"srp_latn"_s, u"sr"_s}, + {u"swe"_s, u"sv"_s}, {u"tur"_s, u"tr"_s}, {u"ukr"_s, u"uk"_s}, {u"vie"_s, u"vi"_s}, {u"amh"_s, u"am"_s}, + {u"asm"_s, u"as"_s}, {u"bod"_s, u"bo"_s}, {u"dzo"_s, u"dz"_s}, {u"guj"_s, u"gu"_s}, {u"kan"_s, u"kn"_s}, + {u"kat"_s, u"ka"_s}, {u"kat_old"_s, u"ka"_s}, {u"kaz"_s, u"kk"_s}, {u"khm"_s, u"km"_s}, {u"kir"_s, u"ky"_s}, + {u"lao"_s, u"lo"_s}, {u"mal"_s, u"ml"_s}, {u"mar"_s, u"mr"_s}, {u"mya"_s, u"my"_s}, {u"nep"_s, u"ne"_s}, + {u"ori"_s, u"or"_s}, {u"pan"_s, u"pa"_s}, {u"sin"_s, u"si"_s}, {u"tam"_s, u"ta"_s}, {u"tel"_s, u"te"_s}, + {u"tha"_s, u"th"_s}, {u"urd"_s, u"ur"_s}, {u"bos"_s, u"bs"_s}, {u"bre"_s, u"br"_s}, {u"cos"_s, u"co"_s}, + {u"fao"_s, u"fo"_s}, {u"fil"_s, u"tl"_s}, {u"fry"_s, u"fy"_s}, {u"hat"_s, u"ht"_s}, {u"hye"_s, u"hy"_s}, + {u"iku"_s, u"iu"_s}, {u"jav"_s, u"jv"_s}, {u"kmr"_s, u"ku"_s}, {u"kur"_s, u"ku"_s}, {u"lat"_s, u"la"_s}, + {u"ltz"_s, u"lb"_s}, {u"mkd"_s, u"mk"_s}, {u"mlt"_s, u"mt"_s}, {u"mon"_s, u"mn"_s}, {u"mri"_s, u"mi"_s}, + {u"msa"_s, u"ms"_s}, {u"oci"_s, u"oc"_s}, {u"pus"_s, u"ps"_s}, {u"que"_s, u"qu"_s}, {u"san"_s, u"sa"_s}, + {u"snd"_s, u"sd"_s}, {u"sqi"_s, u"sq"_s}, {u"sun"_s, u"su"_s}, {u"swa"_s, u"sw"_s}, {u"tat"_s, u"tt"_s}, + {u"tgk"_s, u"tg"_s}, {u"tgl"_s, u"tl"_s}, {u"tir"_s, u"ti"_s}, {u"ton"_s, u"to"_s}, {u"uig"_s, u"ug"_s}, + {u"uzb"_s, u"uz"_s}, {u"uzb_cyrl"_s, u"uz"_s}, {u"yid"_s, u"yi"_s}, {u"yor"_s, u"yo"_s}, + }; + + if (tesseractCode == u"equ"_s) { + return i18n("Math/Equation Detection"); + } + if (tesseractCode == u"osd"_s) { + return i18n("Orientation and Script Detection"); + } + + const QString isoCode = tesseractToIsoMap.value(tesseractCode); + if (!isoCode.isEmpty()) { + QLocale locale(isoCode); + QString name = locale.nativeLanguageName(); + + if (!name.isEmpty()) { + name[0] = name[0].toUpper(); + return name; + } + + QString languageName = QLocale::languageToString(locale.language()); + if (!languageName.isEmpty()) { + languageName[0] = languageName[0].toUpper(); + return languageName; + } + } + + return tesseractCode; +} + +OcrWorker::OcrWorker(QObject *parent) + : QObject(parent) +{ +} + +void OcrWorker::processImage(const QImage &image, tesseract::TessBaseAPI *tesseract) +{ +#ifdef HAVE_TESSERACT_OCR + QMutexLocker locker(&m_mutex); + + if (!tesseract || image.isNull()) { + Q_EMIT imageProcessed(QString(), false); + return; + } + + try { + QImage rgbImage = image.convertToFormat(QImage::Format_RGB888); + + tesseract->SetImage(rgbImage.bits(), rgbImage.width(), rgbImage.height(), 3, rgbImage.bytesPerLine()); + + if (tesseract->Recognize(0) != 0) { + Q_EMIT imageProcessed(QString(), false); + return; + } + + QStringList lines; + std::unique_ptr iterator(tesseract->GetIterator()); + + if (iterator) { + do { + const char *lineText = iterator->GetUTF8Text(tesseract::RIL_TEXTLINE); + if (lineText != nullptr) { + QString line = QString::fromUtf8(lineText).trimmed(); + if (!line.isEmpty()) { + lines.append(line); + } + delete[] lineText; + } + } while (iterator->Next(tesseract::RIL_TEXTLINE)); + } + + const QString result = lines.join(QLatin1Char('\n')).trimmed(); + Q_EMIT imageProcessed(result, true); + } catch (const std::exception &e) { + qCWarning(SPECTACLE_LOG) << "Exception in OCR worker:" << e.what(); + Q_EMIT imageProcessed(QString(), false); + } +#else + Q_UNUSED(image); + Q_UNUSED(tesseract); + Q_EMIT imageProcessed(QString(), false); +#endif +} diff --git a/src/OcrManager.h b/src/OcrManager.h new file mode 100644 index 000000000..c71505b3e --- /dev/null +++ b/src/OcrManager.h @@ -0,0 +1,175 @@ +/* This file is part of Spectacle, the KDE screenshot utility + * SPDX-FileCopyrightText: 2025 Jhair Paris + * SPDX-License-Identifier: LGPL-2.0-or-later + */ + +#pragma once + +#include "Config.h" + +#ifdef HAVE_TESSERACT_OCR +#include +#else +namespace tesseract +{ +class TessBaseAPI; +} +#endif + +#include +#include +#include +#include +#include +#include +#include + +#include + +/** + * @brief Worker class for OCR processing in background thread + */ +class OcrWorker : public QObject +{ + Q_OBJECT + +public: + explicit OcrWorker(QObject *parent = nullptr); + +public Q_SLOTS: + void processImage(const QImage &image, tesseract::TessBaseAPI *tesseract); + +Q_SIGNALS: + void imageProcessed(const QString &text, bool success); + +private: + QMutex m_mutex; +}; + +/** + * This class uses Tesseract OCR engine to extract text from images. + * It provides both synchronous and asynchronous text recognition capabilities. + */ +class OcrManager : public QObject +{ + Q_OBJECT + +public: + static constexpr int MAX_OCR_LANGUAGES = 4; + static constexpr int MIN_OCR_LANGUAGES = 1; + enum class OcrStatus { + Ready = 0, + Processing = 1, + Error = 2 + }; + Q_ENUM(OcrStatus) + + explicit OcrManager(QObject *parent = nullptr); + ~OcrManager() override; + + static OcrManager *instance(); + + /** + * @brief Check if OCR engine is available and properly initialized + * @return true if OCR is available, false otherwise + */ + bool isAvailable() const; + + /** + * @brief Get the current OCR processing status + * @return Current status of the OCR engine + */ + OcrStatus status() const; + + /** + * @brief Get a map of available languages with human-readable names + * @return QMap where key is language code and value is display name + */ + QMap availableLanguagesWithNames() const; + + /** + * @brief Set multiple languages for OCR processing + * @param languageCodes List of language codes to use (e.g., ["eng", "spa", "fra"]) + */ + void setLanguagesByCode(const QStringList &languageCodes); + + /** + * @brief Get the current language code + * @return Current language code (e.g., "eng", "spa") + */ + QString currentLanguageCode() const; + +public Q_SLOTS: + /** + * @brief Extract text from an image asynchronously + * @param image The image to process + * + * This method processes the image in a background thread and emits + * textRecognized() signal when complete. + */ + void recognizeText(const QImage &image); + + /** + * @brief Extract text from an image using a temporary language selection + * @param image The image to process + * @param languageCode The one-off language code to use (e.g. "eng") + * + * The provided language is applied only for this recognition request and + * does not persist the user's saved configuration. + */ + void recognizeTextWithLanguage(const QImage &image, const QString &languageCode); + +Q_SIGNALS: + /** + * @brief Emitted when text recognition is complete + * @param text The recognized text + * @param success true if recognition was successful + */ + void textRecognized(const QString &text, bool success); + + /** + * @brief Emitted when OCR status changes + * @param status New status + */ + void statusChanged(OcrStatus status); + +private Q_SLOTS: + void handleRecognitionComplete(const QString &text, bool success); + +private: + void initializeTesseract(); + void setStatus(OcrStatus status); + bool setupTesseractLanguages(const QStringList &langCodes); + void setupAvailableLanguages(const QString &tessdataPath); + void loadSavedLanguageSetting(); + bool isLanguageAvailable(const QString &languageCode) const; + QString tesseractLangName(const QString &tesseractCode) const; + + /** + * @brief Validate, filter, and apply languages to Tesseract + * @param languageCodes Languages to validate and apply + * @return true if languages were successfully applied + */ + bool validateAndApplyLanguages(const QStringList &languageCodes); + void beginRecognition(const QImage &image); + + static OcrManager *s_instance; + +#ifdef HAVE_TESSERACT_OCR + tesseract::TessBaseAPI *m_tesseract; + OcrWorker *m_worker; +#endif + std::unique_ptr m_workerThread; + QTimer *m_timeoutTimer; + + OcrStatus m_status; + QString m_currentLanguageCode; + QStringList m_configuredLanguages; + QStringList m_activeLanguages; + bool m_shouldRestoreToConfigured; + QStringList m_availableLanguages; + QMap m_languageNames; + bool m_initialized; + +private: +}; \ No newline at end of file diff --git a/src/SpectacleCore.cpp b/src/SpectacleCore.cpp index 7371ce768..caada874e 100644 --- a/src/SpectacleCore.cpp +++ b/src/SpectacleCore.cpp @@ -1,6 +1,7 @@ /* * SPDX-FileCopyrightText: 2019 David Redondo * SPDX-FileCopyrightText: 2015 Boudhayan Gupta + * SPDX-FileCopyrightText: 2025 Jhair Paris * * SPDX-License-Identifier: LGPL-2.0-or-later */ @@ -20,6 +21,7 @@ #include "Gui/HelpMenu.h" #include "Gui/OptionsMenu.h" #include "Gui/InlineMessageModel.h" +#include "OcrManager.h" #include "Platforms/ImagePlatformXcb.h" #include "Platforms/VideoPlatform.h" #include "ShortcutActions.h" @@ -49,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -59,6 +62,8 @@ #include #include #include +#include +#include #include #include #include @@ -538,6 +543,63 @@ SpectacleCore::SpectacleCore(QObject *parent) InlineMessageModel::instance()->push(InlineMessageModel::Scanned, text, result); }; connect(exportManager, &ExportManager::qrCodeScanned, this, onQRCodeScanned); + + auto onOcrTextRecognized = [this](const QString &text, bool success) { + if (!success) { + InlineMessageModel::instance()->push(InlineMessageModel::Error, + i18nc("@info", "Text extraction failed")); + return; + } + + if (text.isEmpty()) { + InlineMessageModel::instance()->push(InlineMessageModel::Copied, + i18nc("@info", "No text found in the image")); + return; + } + + InlineMessageModel::instance()->push(InlineMessageModel::Copied, + i18nc("@info", "Text extraction completed")); + + auto notification = new KNotification(u"ocrTextExtracted"_s, KNotification::CloseOnTimeout, this); + notification->setTitle(i18nc("@info:notification title", "Text Extracted")); + + notification->setText(i18nc("@info:notification", "Text copied to clipboard")); + notification->setIconName(u"document-scan"_s); + + if (!text.isEmpty()) { + auto openEditorAction = notification->addAction(i18nc("@action:button", "Open in Text Editor")); + connect(openEditorAction, &KNotificationAction::activated, this, [text]() { + // Create temporary file with extracted text + auto exportManager = ExportManager::instance(); + exportManager->updateTimestamp(); + auto timestamp = exportManager->timestamp(); + + QString filename = QStringLiteral("spectacle_ocr_%1.txt").arg(timestamp.toString(QStringLiteral("yyyyMMdd_HHmmss"))); + QString templatePath = QDir::tempPath() + QStringLiteral("/") + filename; + + QTemporaryFile tempFile; + tempFile.setFileTemplate(templatePath); + tempFile.setAutoRemove(false); + + if (tempFile.open()) { + QTextStream stream(&tempFile); + stream << text; + tempFile.close(); + + auto job = new KIO::OpenUrlJob(QUrl::fromLocalFile(tempFile.fileName())); + job->start(); + } + }); + } + + notification->sendEvent(); + }; + + // Connect to OCR manager + connect(OcrManager::instance(), &OcrManager::textRecognized, this, onOcrTextRecognized); + connect(OcrManager::instance(), &OcrManager::statusChanged, this, [this](OcrManager::OcrStatus) { + Q_EMIT ocrStatusChanged(); + }); connect(exportManager, &ExportManager::errorMessage, this, &SpectacleCore::showErrorMessage); @@ -582,6 +644,87 @@ SpectacleCore::SpectacleCore(QObject *parent) }); } +bool SpectacleCore::ocrAvailable() const +{ + return OcrManager::instance()->isAvailable(); +} + +OcrManager::OcrStatus SpectacleCore::ocrStatus() const +{ + return OcrManager::instance()->status(); +} + +QVariantMap SpectacleCore::ocrAvailableLanguages() const +{ + auto ocrManager = OcrManager::instance(); + if (!ocrManager->isAvailable()) { + return QVariantMap(); + } + + auto languageMap = ocrManager->availableLanguagesWithNames(); + QVariantMap result; + for (auto it = languageMap.constBegin(); it != languageMap.constEnd(); ++it) { + result[it.key()] = it.value(); + } + return result; +} + +bool SpectacleCore::startOcrExtraction(const QString &languageCode) +{ + if (m_videoMode) { + return false; + } + + const bool hasCaptureWindows = !CaptureWindow::instances().isEmpty(); + + if (hasCaptureWindows) { + auto selectionEditor = SelectionEditor::instance(); + auto inlineMessages = InlineMessageModel::instance(); + + if (!selectionEditor->acceptSelection(ExportManager::UserAction)) { + inlineMessages->push(InlineMessageModel::Error, i18nc("@info", "Please select a region before extracting text")); + return false; + } + + QMetaObject::invokeMethod( + this, + [this, languageCode]() { + performOcrExtraction(languageCode); + }, + Qt::QueuedConnection); + return true; + } + + return performOcrExtraction(languageCode); +} + +bool SpectacleCore::performOcrExtraction(const QString &languageCode) +{ + auto ocrManager = OcrManager::instance(); + auto inlineMessages = InlineMessageModel::instance(); + + if (!ocrManager->isAvailable()) { + inlineMessages->push(InlineMessageModel::Error, i18nc("@info", "OCR is not available.")); + return false; + } + + const QImage image = m_annotationDocument->renderToImage(); + if (image.isNull()) { + inlineMessages->push(InlineMessageModel::Error, i18nc("@info", "No screenshot available.")); + return false; + } + + inlineMessages->push(InlineMessageModel::Copied, i18nc("@info", "Extracting text from image...")); + + if (languageCode.isEmpty()) { + ocrManager->recognizeText(image); + } else { + ocrManager->recognizeTextWithLanguage(image, languageCode); + } + + return true; +} + SpectacleCore::~SpectacleCore() noexcept { s_self = nullptr; diff --git a/src/SpectacleCore.h b/src/SpectacleCore.h index 23d65ead9..2c87ff8f4 100644 --- a/src/SpectacleCore.h +++ b/src/SpectacleCore.h @@ -17,6 +17,7 @@ #include "Gui/Annotations/AnnotationDocument.h" #include "Gui/CaptureWindow.h" #include "Gui/ViewerWindow.h" +#include "OcrManager.h" #include "Platforms/PlatformLoader.h" #include "RecordingModeModel.h" #include "VideoFormatModel.h" @@ -40,6 +41,8 @@ class SpectacleCore : public QObject Q_PROPERTY(bool videoMode READ videoMode WRITE setVideoMode NOTIFY videoModeChanged) Q_PROPERTY(QUrl currentVideo READ currentVideo NOTIFY currentVideoChanged) Q_PROPERTY(AnnotationDocument *annotationDocument READ annotationDocument CONSTANT FINAL) + Q_PROPERTY(bool ocrAvailable READ ocrAvailable NOTIFY ocrStatusChanged FINAL) + Q_PROPERTY(OcrManager::OcrStatus ocrStatus READ ocrStatus NOTIFY ocrStatusChanged FINAL) public: enum class StartMode { @@ -74,6 +77,10 @@ public: QUrl currentVideo() const; + bool ocrAvailable() const; + OcrManager::OcrStatus ocrStatus() const; + Q_INVOKABLE QVariantMap ocrAvailableLanguages() const; + Q_INVOKABLE bool startOcrExtraction(const QString &languageCode = QString()); void initGuiNoScreenshot(); @@ -125,6 +132,7 @@ Q_SIGNALS: void videoModeChanged(bool videoMode); void currentVideoChanged(const QUrl ¤tVideo); void recordedTimeChanged(); + void ocrStatusChanged(); private: explicit SpectacleCore(QObject *parent = nullptr); @@ -148,6 +156,7 @@ private: void unityLauncherUpdate(const QVariantMap &properties) const; void setCurrentVideo(const QUrl ¤tVideo); QUrl videoOutputUrl() const; + bool performOcrExtraction(const QString &languageCode); static SpectacleCore *s_self; std::unique_ptr m_annotationDocument = nullptr; -- GitLab From a1f7ac0b716ea295cfec120bf8691dd86e56413b Mon Sep 17 00:00:00 2001 From: Jhair Paris Date: Mon, 13 Oct 2025 22:58:17 -0500 Subject: [PATCH 3/3] add support for multiple OCR languages in preferences dialog - Switch from single ocrLanguage string to ocrLanguages string list in settings - Add OcrLanguageSelector widget for multi-language selection - Integrate new selector into GeneralOptionsPage and SettingsDialog add OCR language menu to main interface - Introduce OcrLanguageMenu and OcrLanguageMenuButton components - Expose language selection in ViewerPage and CaptureOverlay - Move OCR extraction logic to SpectacleCore::startOcrExtraction Remove OCR language menu components and references from the project Add OCR language submenu to ExportMenu --- src/CMakeLists.txt | 1 + src/Gui/CaptureOverlay.qml | 8 +- src/Gui/ExportMenu.cpp | 84 ++++++ src/Gui/ExportMenu.h | 5 + src/Gui/OcrAction.qml | 7 +- src/Gui/SettingsDialog/GeneralOptions.ui | 38 ++- src/Gui/SettingsDialog/GeneralOptionsPage.cpp | 81 ++---- src/Gui/SettingsDialog/GeneralOptionsPage.h | 16 +- .../SettingsDialog/OcrLanguageSelector.cpp | 271 ++++++++++++++++++ src/Gui/SettingsDialog/OcrLanguageSelector.h | 111 +++++++ src/Gui/SettingsDialog/SettingsDialog.cpp | 17 +- src/Gui/SettingsDialog/spectacle.kcfg | 4 +- src/Gui/ViewerPage.qml | 4 +- 13 files changed, 561 insertions(+), 86 deletions(-) create mode 100644 src/Gui/SettingsDialog/OcrLanguageSelector.cpp create mode 100644 src/Gui/SettingsDialog/OcrLanguageSelector.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c57535e34..6efeff637 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -44,6 +44,7 @@ target_sources(spectacle PRIVATE Gui/SelectionEditor.cpp Gui/SettingsDialog/GeneralOptionsPage.cpp Gui/SettingsDialog/ImageSaveOptionsPage.cpp + Gui/SettingsDialog/OcrLanguageSelector.cpp Gui/SettingsDialog/SettingsDialog.cpp Gui/SettingsDialog/ShortcutsOptionsPage.cpp Gui/SettingsDialog/VideoFormatComboBox.cpp diff --git a/src/Gui/CaptureOverlay.qml b/src/Gui/CaptureOverlay.qml index 37f3dcf85..431d76479 100644 --- a/src/Gui/CaptureOverlay.qml +++ b/src/Gui/CaptureOverlay.qml @@ -506,11 +506,13 @@ MouseArea { visible: action.enabled action: CopyImageAction {} } + ToolButton { display: TtToolButton.IconOnly - visible: action.enabled && !SpectacleCore.videoMode && SpectacleCore.ocrAvailable + visible: !SpectacleCore.videoMode && SpectacleCore.ocrAvailable action: OcrAction {} } + ExportMenuButton { focusPolicy: Qt.NoFocus } @@ -537,10 +539,12 @@ MouseArea { visible: action.enabled action: CopyImageAction {} } + ToolButton { - visible: action.enabled && !SpectacleCore.videoMode && SpectacleCore.ocrAvailable + visible: !SpectacleCore.videoMode && SpectacleCore.ocrAvailable action: OcrAction {} } + ExportMenuButton { focusPolicy: Qt.NoFocus } diff --git a/src/Gui/ExportMenu.cpp b/src/Gui/ExportMenu.cpp index e2c7dd5b6..57e1480fa 100644 --- a/src/Gui/ExportMenu.cpp +++ b/src/Gui/ExportMenu.cpp @@ -6,6 +6,7 @@ #include "ExportMenu.h" #include "CaptureWindow.h" +#include "OcrManager.h" #include "SpectacleCore.h" #include "WidgetWindowUtils.h" #include "settings.h" @@ -54,6 +55,8 @@ ExportMenu::ExportMenu(QWidget *parent) this, &ExportMenu::openScreenshotsFolder); addAction(KStandardActions::print(this, &ExportMenu::openPrintDialog, this)); + createOcrLanguageSubmenu(); + #ifdef PURPOSE_FOUND loadPurposeMenu(); connect(ExportManager::instance(), &ExportManager::imageChanged, this, &ExportMenu::onImageChanged); @@ -233,4 +236,85 @@ void ExportMenu::openPrintDialog() dialog->setVisible(true); } +void ExportMenu::createOcrLanguageSubmenu() +{ + Q_ASSERT(!m_ocrLanguageMenu); + + auto ocrManager = OcrManager::instance(); + + if (!ocrManager || !ocrManager->isAvailable()) { + return; + } + + m_ocrLanguageMenu = addMenu(i18nc("@action:menu", "Extract Text by Language")); + m_ocrLanguageMenu->setIcon(QIcon::fromTheme(u"document-scan"_s)); + + // Keep the submenu in sync with OCR status changes + if (ocrManager) { + connect(ocrManager, &OcrManager::statusChanged, this, &ExportMenu::buildOcrLanguageSubmenu); + } + + if (auto settings = Settings::self()) { + connect(settings, &Settings::ocrLanguagesChanged, this, &ExportMenu::buildOcrLanguageSubmenu); + } + + connect(m_ocrLanguageMenu, &QMenu::aboutToShow, this, &ExportMenu::buildOcrLanguageSubmenu); + + buildOcrLanguageSubmenu(); +} + +void ExportMenu::buildOcrLanguageSubmenu() +{ + if (!m_ocrLanguageMenu) { + return; + } + + m_ocrLanguageMenu->clear(); + + auto ocrManager = OcrManager::instance(); + + if (!ocrManager) { + QAction *action = m_ocrLanguageMenu->addAction(i18n("OCR engine is not available.")); + action->setEnabled(false); + return; + } + + const bool initializationFailed = ocrManager->status() == OcrManager::OcrStatus::Error; + if (!ocrManager->isAvailable()) { + QAction *action = m_ocrLanguageMenu->addAction(initializationFailed ? i18n("OCR is not available. Please install Tesseract OCR.") + : i18n("OCR engine is initializing…")); + action->setEnabled(false); + return; + } + + const bool busy = ocrManager->status() == OcrManager::OcrStatus::Processing; + const QMap languages = ocrManager->availableLanguagesWithNames(); + + if (languages.isEmpty()) { + QAction *action = m_ocrLanguageMenu->addAction(i18n("No OCR language data available.")); + action->setEnabled(false); + return; + } + + for (auto it = languages.cbegin(); it != languages.cend(); ++it) { + const QString &code = it.key(); + + if (code == u"osd"_s) { + continue; + } + + QAction *languageAction = m_ocrLanguageMenu->addAction(it.value()); + languageAction->setEnabled(!busy); + + connect(languageAction, &QAction::triggered, this, [this, code]() { + triggerExtraction(code); + }); + } +} + +void ExportMenu::triggerExtraction(const QString &languageCode) +{ + SpectacleCore::instance()->startOcrExtraction(languageCode); +} + #include "moc_ExportMenu.cpp" diff --git a/src/Gui/ExportMenu.h b/src/Gui/ExportMenu.h index e0533a708..bfac0b990 100644 --- a/src/Gui/ExportMenu.h +++ b/src/Gui/ExportMenu.h @@ -9,6 +9,7 @@ #include "SpectacleMenu.h" +#include #include #include "Config.h" @@ -49,8 +50,11 @@ private: Q_SLOT void onImageChanged(); Q_SLOT void openScreenshotsFolder(); + Q_SLOT void buildOcrLanguageSubmenu(); + Q_SLOT void triggerExtraction(const QString &languageCode); void getKServiceItems(); + void createOcrLanguageSubmenu(); #ifdef PURPOSE_FOUND void loadPurposeMenu(); @@ -59,6 +63,7 @@ private: bool mUpdatedImageAvailable; std::unique_ptr mPurposeMenu; #endif + QMenu *m_ocrLanguageMenu = nullptr; friend class ExportMenuSingleton; }; diff --git a/src/Gui/OcrAction.qml b/src/Gui/OcrAction.qml index f887ec0ee..a22efec16 100644 --- a/src/Gui/OcrAction.qml +++ b/src/Gui/OcrAction.qml @@ -6,9 +6,10 @@ import QtQuick.Templates as T import org.kde.spectacle.private T.Action { - // OCR is only available for screenshots, not videos, and only when OCR is properly available - enabled: !SpectacleCore.videoMode && SpectacleCore.ocrAvailable + enabled: !SpectacleCore.videoMode && + SpectacleCore.ocrAvailable && + SpectacleCore.ocrStatus !== 1 icon.name: "document-scan" text: i18nc("@action", "Extract Text") - onTriggered: contextWindow.extractText() + onTriggered: SpectacleCore.startOcrExtraction() } diff --git a/src/Gui/SettingsDialog/GeneralOptions.ui b/src/Gui/SettingsDialog/GeneralOptions.ui index ddbbf3e5a..048639b89 100644 --- a/src/Gui/SettingsDialog/GeneralOptions.ui +++ b/src/Gui/SettingsDialog/GeneralOptions.ui @@ -265,21 +265,39 @@ - Language: + Languages for OCR: - - - - 0 - 0 - + + + true - - currentData + + QFrame::StyledPanel + + 120 + + + 60 + + + Qt::ScrollBarAlwaysOff + + + + + 0 + 0 + 69 + 69 + + + + + @@ -383,7 +401,7 @@ kcfg_useReleaseToCapture kcfg_showCaptureInstructions kcfg_rememberSelectionRect - kcfg_ocrLanguage + ocrLanguageScrollArea diff --git a/src/Gui/SettingsDialog/GeneralOptionsPage.cpp b/src/Gui/SettingsDialog/GeneralOptionsPage.cpp index 5b8a5d9fc..f6be13d56 100644 --- a/src/Gui/SettingsDialog/GeneralOptionsPage.cpp +++ b/src/Gui/SettingsDialog/GeneralOptionsPage.cpp @@ -8,19 +8,22 @@ #include "GeneralOptionsPage.h" +#include "OcrLanguageSelector.h" +#include "OcrManager.h" #include "settings.h" #include "ui_GeneralOptions.h" -#include "OcrManager.h" -#include #include +#include -#include #include +using namespace Qt::Literals::StringLiterals; + GeneralOptionsPage::GeneralOptionsPage(QWidget *parent) : QWidget(parent) , m_ui(new Ui_GeneralOptions) + , m_ocrLanguageSelector(new OcrLanguageSelector(this)) { m_ui->setupUi(this); @@ -31,9 +34,12 @@ GeneralOptionsPage::GeneralOptionsPage(QWidget *parent) m_ui->regionTitle->setLevel(2); m_ui->ocrTitle->setLevel(2); - setupOcrLanguageComboBox(); + m_ui->ocrLanguageScrollArea->setWidget(m_ocrLanguageSelector); + m_ui->ocrLanguageScrollArea->setWidgetResizable(true); + + connect(m_ocrLanguageSelector, &OcrLanguageSelector::selectedLanguagesChanged, this, &GeneralOptionsPage::ocrLanguageChanged); - connect(OcrManager::instance(), &OcrManager::statusChanged, this, &GeneralOptionsPage::refreshOcrLanguageSettings); + refreshOcrLanguageSettings(); //On Wayland we can't programmatically raise and focus the window so we have to hide the option if (KWindowSystem::isPlatformWayland() || qstrcmp(qgetenv("XDG_SESSION_TYPE").constData(), "wayland") == 0) { @@ -43,71 +49,20 @@ GeneralOptionsPage::GeneralOptionsPage(QWidget *parent) GeneralOptionsPage::~GeneralOptionsPage() = default; -void GeneralOptionsPage::setupOcrLanguageComboBox() -{ - OcrManager *ocrManager = OcrManager::instance(); - - if (!ocrManager->isAvailable()) { - m_ui->kcfg_ocrLanguage->setEnabled(false); - m_ui->kcfg_ocrLanguage->addItem(i18n("OCR not available")); - m_ui->ocrLanguageLabel->setVisible(false); - m_ui->kcfg_ocrLanguage->setVisible(false); - m_ui->ocrUnavailableWidget->setVisible(true); - return; - } - - const auto availableLanguages = ocrManager->availableLanguagesWithNames(); - - if (availableLanguages.isEmpty()) { - m_ui->kcfg_ocrLanguage->addItem(i18n("No languages found")); - m_ui->kcfg_ocrLanguage->setEnabled(false); - return; - } - - m_ui->kcfg_ocrLanguage->clear(); - m_ui->ocrLanguageLabel->setVisible(true); - m_ui->kcfg_ocrLanguage->setVisible(true); - m_ui->ocrUnavailableWidget->setVisible(false); - - for (auto it = availableLanguages.constBegin(); it != availableLanguages.constEnd(); ++it) { - m_ui->kcfg_ocrLanguage->addItem(it.value(), it.key()); - } -} - void GeneralOptionsPage::refreshOcrLanguageSettings() { OcrManager *ocrManager = OcrManager::instance(); if (!ocrManager->isAvailable()) { m_ui->ocrLanguageLabel->setVisible(false); - m_ui->kcfg_ocrLanguage->setVisible(false); + m_ui->ocrLanguageScrollArea->setVisible(false); m_ui->ocrUnavailableWidget->setVisible(true); - return; - } - - const auto availableLanguages = ocrManager->availableLanguagesWithNames(); - - if (availableLanguages.isEmpty()) { - return; - } - - m_ui->kcfg_ocrLanguage->clear(); - m_ui->kcfg_ocrLanguage->setEnabled(true); - m_ui->ocrLanguageLabel->setVisible(true); - m_ui->kcfg_ocrLanguage->setVisible(true); - m_ui->ocrUnavailableWidget->setVisible(false); - - for (auto it = availableLanguages.constBegin(); it != availableLanguages.constEnd(); ++it) { - m_ui->kcfg_ocrLanguage->addItem(it.value(), it.key()); - } - - const QString currentLanguage = Settings::ocrLanguage(); - - for (int i = 0; i < m_ui->kcfg_ocrLanguage->count(); ++i) { - if (m_ui->kcfg_ocrLanguage->itemData(i).toString() == currentLanguage) { - m_ui->kcfg_ocrLanguage->setCurrentIndex(i); - break; - } + } else { + m_ui->ocrLanguageLabel->setVisible(true); + m_ui->ocrLanguageScrollArea->setVisible(true); + m_ui->ocrUnavailableWidget->setVisible(false); + + m_ocrLanguageSelector->refresh(); } } diff --git a/src/Gui/SettingsDialog/GeneralOptionsPage.h b/src/Gui/SettingsDialog/GeneralOptionsPage.h index c184d6ba8..a3a5cb17d 100644 --- a/src/Gui/SettingsDialog/GeneralOptionsPage.h +++ b/src/Gui/SettingsDialog/GeneralOptionsPage.h @@ -11,6 +11,7 @@ #include class Ui_GeneralOptions; +class OcrLanguageSelector; class GeneralOptionsPage : public QWidget { @@ -22,10 +23,21 @@ public: void refreshOcrLanguageSettings(); + /** + * @brief Get direct access to the OCR language selector widget + * @return Pointer to the OcrLanguageSelector widget for direct manipulation + */ + OcrLanguageSelector *ocrLanguageSelector() const + { + return m_ocrLanguageSelector; + } + +Q_SIGNALS: + void ocrLanguageChanged(); + private: - void setupOcrLanguageComboBox(); - QScopedPointer m_ui; + OcrLanguageSelector *m_ocrLanguageSelector; }; #endif // GENERALOPTIONSPAGE_H diff --git a/src/Gui/SettingsDialog/OcrLanguageSelector.cpp b/src/Gui/SettingsDialog/OcrLanguageSelector.cpp new file mode 100644 index 000000000..d1d809323 --- /dev/null +++ b/src/Gui/SettingsDialog/OcrLanguageSelector.cpp @@ -0,0 +1,271 @@ +/* + * SPDX-FileCopyrightText: 2025 Jhair Paris + * + * SPDX-License-Identifier: LGPL-2.0-or-later + */ + +#include "OcrLanguageSelector.h" +#include "OcrManager.h" +#include "settings.h" +#include "spectacle_debug.h" + +#include + +#include +#include + +using namespace Qt::Literals::StringLiterals; + +OcrLanguageSelector::OcrLanguageSelector(QWidget *parent) + : QWidget(parent) + , m_layout(new QVBoxLayout(this)) + , m_blockSignals(false) + , m_ocrManager(OcrManager::instance()) +{ + m_layout->setContentsMargins(0, 0, 0, 0); + m_layout->setSpacing(0); + setContentsMargins(0, 0, 0, 0); + + setupLanguageCheckboxes(); + + connect(m_ocrManager, &OcrManager::statusChanged, this, &OcrLanguageSelector::onOcrManagerStatusChanged); +} + +OcrLanguageSelector::~OcrLanguageSelector() = default; + +QStringList OcrLanguageSelector::selectedLanguages() const +{ + QStringList result; + for (QCheckBox *checkbox : m_languageCheckboxes) { + if (checkbox->isChecked()) { + result.append(checkbox->property("languageCode").toString()); + } + } + return result; +} + +void OcrLanguageSelector::setSelectedLanguages(const QStringList &languages) +{ + m_blockSignals = true; + + for (QCheckBox *checkbox : m_languageCheckboxes) { + const QString langCode = checkbox->property("languageCode").toString(); + checkbox->setChecked(languages.contains(langCode)); + } + + m_blockSignals = false; + + enforceSelectionLimits(); +} + +bool OcrLanguageSelector::isDefault() const +{ + const QStringList current = selectedLanguages(); + + // Default state is exactly one language selected + if (current.size() != 1) { + return false; + } + + // Check if it's English (preferred default) + for (const QCheckBox *checkbox : m_languageCheckboxes) { + if (checkbox->property("languageCode").toString() == u"eng"_s) { + // English is available, so default is English + return current.contains(u"eng"_s); + } + } + + // English not available, default is the first available language + if (!m_languageCheckboxes.isEmpty()) { + QString firstLangCode = m_languageCheckboxes.first()->property("languageCode").toString(); + return current.contains(firstLangCode); + } + + return false; +} + +bool OcrLanguageSelector::hasChanges() const +{ + return selectedLanguages() != Settings::ocrLanguages(); +} + +void OcrLanguageSelector::applyDefaults() +{ + if (!m_languageCheckboxes.isEmpty()) { + m_blockSignals = true; + + for (QCheckBox *checkbox : m_languageCheckboxes) { + checkbox->setChecked(false); + } + + // Try to select English first + bool foundDefault = false; + for (QCheckBox *checkbox : m_languageCheckboxes) { + if (checkbox->property("languageCode").toString() == u"eng"_s) { + checkbox->setChecked(true); + foundDefault = true; + break; + } + } + + // If English not available, select first language + if (!foundDefault) { + m_languageCheckboxes.first()->setChecked(true); + } + + m_blockSignals = false; + + const QStringList selected = selectedLanguages(); + Settings::setOcrLanguages(selected); + + // Emit signal to notify changes + Q_EMIT selectedLanguagesChanged(selected); + } +} + +void OcrLanguageSelector::refresh() +{ + setupLanguageCheckboxes(); +} + +void OcrLanguageSelector::saveSettings() +{ + const QStringList selected = selectedLanguages(); + Settings::setOcrLanguages(selected); +} + +void OcrLanguageSelector::updateWidgets() +{ + const QStringList savedLanguages = Settings::ocrLanguages(); + setSelectedLanguages(savedLanguages); +} + +void OcrLanguageSelector::onLanguageCheckboxChanged() +{ + if (m_blockSignals) { + return; + } + + enforceSelectionLimits(); + + const QStringList selected = selectedLanguages(); + Q_EMIT selectedLanguagesChanged(selected); +} + +void OcrLanguageSelector::onOcrManagerStatusChanged() +{ + refresh(); +} + +void OcrLanguageSelector::setupLanguageCheckboxes() +{ + while (QLayoutItem *item = m_layout->takeAt(0)) { + if (auto widget = item->widget()) { + widget->deleteLater(); + } + delete item; + } + + m_languageCheckboxes.clear(); + m_availableLanguages.clear(); + + if (!m_ocrManager || !m_ocrManager->isAvailable()) { + qCWarning(SPECTACLE_LOG) << "OCR is not available; language selector will remain empty."; + return; + } + + m_availableLanguages = m_ocrManager->availableLanguagesWithNames(); + + if (m_availableLanguages.isEmpty()) { + qCWarning(SPECTACLE_LOG) << "No OCR language data available."; + return; + } + + for (auto it = m_availableLanguages.cbegin(); it != m_availableLanguages.cend(); ++it) { + const QString &langCode = it.key(); + if (langCode == u"osd"_s) { + continue; + } + + QCheckBox *checkbox = new QCheckBox(it.value(), this); + checkbox->setProperty("languageCode", langCode); + connect(checkbox, &QCheckBox::toggled, this, &OcrLanguageSelector::onLanguageCheckboxChanged); + m_layout->addWidget(checkbox); + m_languageCheckboxes.append(checkbox); + } + + if (m_layout->count() > 0) { + m_layout->addStretch(); + } + + const QStringList savedLanguages = Settings::ocrLanguages(); + setSelectedLanguages(savedLanguages); + + if (savedLanguages.isEmpty() && !m_languageCheckboxes.isEmpty()) { + applyDefaults(); + } +} + +void OcrLanguageSelector::enforceSelectionLimits() +{ + const QStringList selected = selectedLanguages(); + const int count = selected.size(); + + if (count > OcrManager::MAX_OCR_LANGUAGES) { // Max languages for performance + for (int i = m_languageCheckboxes.size() - 1; i >= 0; --i) { + QCheckBox *checkbox = m_languageCheckboxes[i]; + if (checkbox->isChecked()) { + blockSignalsAndSetChecked(checkbox, false); + break; + } + } + } + + updateCheckboxEnabledStates(); + + if (selectedLanguages().size() == 0 && !m_languageCheckboxes.isEmpty()) { + applyDefaults(); + } +} + +QString OcrLanguageSelector::getDefaultLanguageCode() const +{ + if (m_languageCheckboxes.isEmpty()) { + return QString(); + } + + // Try English first + for (const QCheckBox *checkbox : m_languageCheckboxes) { + if (checkbox->property("languageCode").toString() == u"eng"_s) { + return u"eng"_s; + } + } + + // Fallback to first available + return m_languageCheckboxes.first()->property("languageCode").toString(); +} + +void OcrLanguageSelector::updateCheckboxEnabledStates() +{ + const QStringList selected = selectedLanguages(); + const int count = selected.size(); + + // If we have max languages selected, disable all unchecked checkboxes + // If we have less than max, enable all checkboxes + for (QCheckBox *checkbox : m_languageCheckboxes) { + if (checkbox->isChecked()) { + checkbox->setEnabled(true); + } else { + checkbox->setEnabled(count < OcrManager::MAX_OCR_LANGUAGES); + } + } +} + +void OcrLanguageSelector::blockSignalsAndSetChecked(QCheckBox *checkbox, bool checked) +{ + m_blockSignals = true; + checkbox->setChecked(checked); + m_blockSignals = false; +} + +#include "moc_OcrLanguageSelector.cpp" \ No newline at end of file diff --git a/src/Gui/SettingsDialog/OcrLanguageSelector.h b/src/Gui/SettingsDialog/OcrLanguageSelector.h new file mode 100644 index 000000000..59b1a3d42 --- /dev/null +++ b/src/Gui/SettingsDialog/OcrLanguageSelector.h @@ -0,0 +1,111 @@ +/* + * SPDX-FileCopyrightText: 2025 Jhair Paris + * + * SPDX-License-Identifier: LGPL-2.0-or-later + */ + +#ifndef OCRLANGUAGESELECTOR_H +#define OCRLANGUAGESELECTOR_H + +#include +#include +#include + +class OcrManager; + +/** + * @brief Specialized widget for OCR language selection with multi-language support + * + * This widget encapsulates all the logic for OCR language selection: + * - Displays available languages as checkboxes (excluding 'osd') + * - Enforces limits: minimum 1, maximum languages defined by OcrManager + * - Handles defaults: English preferred, fallback to first available + * - Follows KConfigDialog pattern: no auto-persistence, explicit save/update methods + * - Updates dynamically when OCR manager state changes + */ +class OcrLanguageSelector : public QWidget +{ + Q_OBJECT + Q_PROPERTY(QStringList selectedLanguages READ selectedLanguages WRITE setSelectedLanguages NOTIFY selectedLanguagesChanged USER true) + Q_PROPERTY(bool isDefault READ isDefault NOTIFY selectedLanguagesChanged) + Q_PROPERTY(bool hasChanges READ hasChanges NOTIFY selectedLanguagesChanged) + +public: + explicit OcrLanguageSelector(QWidget *parent = nullptr); + ~OcrLanguageSelector() override; + + /** + * @brief Get currently selected language codes + * @return List of selected language codes (e.g., ["eng", "spa"]) + */ + QStringList selectedLanguages() const; + + /** + * @brief Set selected languages + * @param languages List of language codes to select + */ + void setSelectedLanguages(const QStringList &languages); + + /** + * @brief Check if current selection is the default state + * @return true if selection represents default configuration + */ + bool isDefault() const; + + /** + * @brief Check if there are unsaved changes + * @return true if current selection differs from saved configuration + */ + bool hasChanges() const; + + /** + * @brief Apply default language selection + * Selects English if available, otherwise first available language + */ + void applyDefaults(); + + /** + * @brief Refresh the widget when OCR manager state changes + * Rebuilds checkboxes based on current available languages + */ + void refresh(); + + /** + * @brief Save current selection to settings (called by KConfigDialog) + * Follows KConfigDialog pattern for saving changes + */ + void saveSettings(); + + /** + * @brief Update widget to reflect current settings (called by KConfigDialog) + * Reloads settings when user cancels or dialog is reopened + */ + void updateWidgets(); + +Q_SIGNALS: + /** + * @brief Emitted when language selection changes + * @param languages New list of selected languages + */ + void selectedLanguagesChanged(const QStringList &languages); + +private Q_SLOTS: + void onLanguageCheckboxChanged(); + void onOcrManagerStatusChanged(); + +private: + void setupLanguageCheckboxes(); + void enforceSelectionLimits(); + void updateCheckboxEnabledStates(); + QString getDefaultLanguageCode() const; + void blockSignalsAndSetChecked(QCheckBox *checkbox, bool checked); + + QVBoxLayout *m_layout; + QList m_languageCheckboxes; + QMap m_availableLanguages; // code -> display name + bool m_blockSignals; + + OcrManager *m_ocrManager; +}; + +#endif // OCRLANGUAGESELECTOR_H \ No newline at end of file diff --git a/src/Gui/SettingsDialog/SettingsDialog.cpp b/src/Gui/SettingsDialog/SettingsDialog.cpp index a19a47627..532bfd3c3 100644 --- a/src/Gui/SettingsDialog/SettingsDialog.cpp +++ b/src/Gui/SettingsDialog/SettingsDialog.cpp @@ -1,4 +1,5 @@ /* + * SPDX-FileCopyrightText: 2025 Jhair Paris * SPDX-FileCopyrightText: 2019 David Redondo * SPDX-FileCopyrightText: 2015 Boudhayan Gupta * @@ -9,8 +10,9 @@ #include "GeneralOptionsPage.h" #include "ImageSaveOptionsPage.h" -#include "VideoSaveOptionsPage.h" +#include "OcrLanguageSelector.h" #include "ShortcutsOptionsPage.h" +#include "VideoSaveOptionsPage.h" #include "settings.h" #include @@ -38,6 +40,9 @@ SettingsDialog::SettingsDialog(QWidget *parent) connect(m_shortcutsPage, &ShortcutsOptionsPage::shortCutsChanged, this, [this] { updateButtons(); }); + connect(m_generalPage, &GeneralOptionsPage::ocrLanguageChanged, this, [this] { + updateButtons(); + }); connect(this, &KConfigDialog::currentPageChanged, this, &SettingsDialog::updateButtons); } @@ -72,18 +77,20 @@ void SettingsDialog::showEvent(QShowEvent *event) bool SettingsDialog::hasChanged() { - return m_shortcutsPage->isModified() || KConfigDialog::hasChanged(); + return m_shortcutsPage->isModified() || m_generalPage->ocrLanguageSelector()->hasChanges() || KConfigDialog::hasChanged(); } bool SettingsDialog::isDefault() { - return currentPage()->name() != i18n("Shortcuts") && KConfigDialog::isDefault(); + return currentPage()->name() != i18n("Shortcuts") && m_generalPage->ocrLanguageSelector()->isDefault() && KConfigDialog::isDefault(); } void SettingsDialog::updateSettings() { KConfigDialog::updateSettings(); m_shortcutsPage->saveChanges(); + + m_generalPage->ocrLanguageSelector()->saveSettings(); } void SettingsDialog::updateWidgets() @@ -91,6 +98,7 @@ void SettingsDialog::updateWidgets() KConfigDialog::updateWidgets(); m_shortcutsPage->resetChanges(); + m_generalPage->ocrLanguageSelector()->updateWidgets(); m_generalPage->refreshOcrLanguageSettings(); } @@ -98,6 +106,9 @@ void SettingsDialog::updateWidgetsDefault() { KConfigDialog::updateWidgetsDefault(); m_shortcutsPage->defaults(); + + m_generalPage->ocrLanguageSelector()->applyDefaults(); + m_generalPage->refreshOcrLanguageSettings(); } #include "moc_SettingsDialog.cpp" diff --git a/src/Gui/SettingsDialog/spectacle.kcfg b/src/Gui/SettingsDialog/spectacle.kcfg index 4517e2344..2062f7cc4 100644 --- a/src/Gui/SettingsDialog/spectacle.kcfg +++ b/src/Gui/SettingsDialog/spectacle.kcfg @@ -70,8 +70,8 @@ UntilClosed - - + + eng diff --git a/src/Gui/ViewerPage.qml b/src/Gui/ViewerPage.qml index 602e4431b..133793964 100644 --- a/src/Gui/ViewerPage.qml +++ b/src/Gui/ViewerPage.qml @@ -61,11 +61,13 @@ EmptyPage { visible: action.enabled action: CopyImageAction {} } + TtToolButton { display: TtToolButton.IconOnly - visible: action.enabled && SpectacleCore.ocrAvailable + visible: !SpectacleCore.videoMode && SpectacleCore.ocrAvailable action: OcrAction {} } + // We only show this in video mode to save space in screenshot mode TtToolButton { visible: SpectacleCore.videoMode -- GitLab