From 9c8e7c6f52a4f088ae406ea9dc88247d97e68f61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valent=C3=ADn=20Kivachuk?= Date: Sun, 14 Jul 2019 17:45:45 +0200 Subject: [PATCH] tesseract: add package MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tesseract is an open source text recognizer (OCR) Engine, available under the Apache 2.0 license. It can be used directly, or (for programmers) using an API to extract printed text from images. It supports a wide variety of languages. Signed-off-by: Valentín Kivachuk --- utils/tessdata/Makefile | 60 ++++++++++++++++++++++++++++++++++++++++ utils/tesseract/Makefile | 57 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 utils/tessdata/Makefile create mode 100644 utils/tesseract/Makefile diff --git a/utils/tessdata/Makefile b/utils/tessdata/Makefile new file mode 100644 index 000000000..c0fa83d52 --- /dev/null +++ b/utils/tessdata/Makefile @@ -0,0 +1,60 @@ +# Copyright (C) 2019 Valentín Kivachuk +# +# This is free software, licensed under the GNU General Public License v2. +# See /LICENSE for more information. +# + +include $(TOPDIR)/rules.mk + +PKG_NAME:=tessdata +PKG_VERSION:=4.0.0 +PKG_RELEASE:=1 + +PKG_SOURCE:=$(PKG_NAME)-$(PKG_VERSION).tar.gz +PKG_SOURCE_URL:=https://codeload.github.com/tesseract-ocr/tessdata/tar.gz/$(PKG_VERSION)? +PKG_HASH:=38c637d3a1763f6c3d32e8f1d979f045668676ec5feb8ee1869ee77cedd31b08 + +PKG_MAINTAINER:=Valentín Kivachuk +PKG_LICENSE:=Apache-2.0 +PKG_LICENSE_FILES:=COPYING + +PKG_INSTALL:=1 + +#No need to extract 1,5GB... +PKG_UNPACK:= + +include $(INCLUDE_DIR)/package.mk + +ALLTESSERACTLANG:=afr amh ara asm aze aze_cyrl bel ben bod bos bre bul cat ceb ces chi_sim chi_sim_vert chi_tra chi_tra_vert chr cos cym dan dan_frak deu deu_frak div dzo ell eng enm epo equ est eus fao fas fil fin fra frk frm fry gla gle glg grc guj hat heb hin hrv hun hye iku ind isl ita ita_old jav jpn jpn_vert kan kat kat_old kaz khm kir kor kor_vert kur kur_ara lao lat lav lit ltz mal mar mkd mlt mon mri msa mya nep nld nor oci ori osd pan pol por pus que ron rus san sin slk slk_frak slv snd spa spa_old sqi srp srp_latn sun swa swe syr tam tat tel tgk tgl tha tir ton tur uig ukr urd uzb uzb_cyrl vie yid yor + + +define Build/Compile +endef + +define Build/Install +endef + + +define Package/tesseract-data-default + SUBMENU:=Tesseract + SECTION:=utils + CATEGORY:=Utilities + DEPENDS:=tesseract +endef + +define generate-tesseract-data-package + define Package/tesseract-data-$(1) + TITLE:=Tesseract training data for $(1) language + $(call Package/tesseract-data-default) + endef + + define Package/tesseract-data-$(1)/install + $(INSTALL_DIR) $$(1)/usr/share/tessdata + $(TAR) --strip=1 -C $$(1)/usr/share/tessdata/ -xvf $(DL_DIR)/$(PKG_NAME)-$(PKG_VERSION).tar.gz $(PKG_NAME)-$(PKG_VERSION)/$(1).traineddata + endef + +endef + + +$(foreach LANG,$(ALLTESSERACTLANG),$(eval $(call generate-tesseract-data-package,$(LANG)))) +$(foreach LANG,$(ALLTESSERACTLANG),$(eval $(call BuildPackage,tesseract-data-$(LANG)))) diff --git a/utils/tesseract/Makefile b/utils/tesseract/Makefile new file mode 100644 index 000000000..dc8d2a459 --- /dev/null +++ b/utils/tesseract/Makefile @@ -0,0 +1,57 @@ +# Copyright (C) 2019 Valentin Kivachuk +# +# This is free software, licensed under the GNU General Public License v2. +# See /LICENSE for more information. +# + +include $(TOPDIR)/rules.mk + +PKG_NAME:=tesseract +PKG_VERSION:=4.0.0 +PKG_RELEASE:=1 + +PKG_SOURCE:=$(PKG_NAME)-$(PKG_VERSION).tar.gz +PKG_SOURCE_URL:=https://codeload.github.com/tesseract-ocr/tesseract/tar.gz/$(PKG_VERSION)? +PKG_HASH:=a1f5422ca49a32e5f35c54dee5112b11b99928fc9f4ee6695cdc6768d69f61dd + +PKG_MAINTAINER:=Valentin Kivachuk +PKG_LICENSE:=Apache-2.0 +PKG_LICENSE_FILES:=LICENSE + +PKG_BUILD_PARALLEL:=1 +PKG_FIXUP:=autoreconf +PKG_INSTALL:=1 + +include $(INCLUDE_DIR)/package.mk + +TARGET_LDFLAGS += -Wl,-rpath-link=$(STAGING_DIR)/usr/lib + +define Package/tesseract + MENU:=1 + SECTION:=utils + CATEGORY:=Utilities + TITLE:=Tesseract Open Source OCR Engine + URL:=https://github.com/tesseract-ocr/tesseract + DEPENDS:=+libleptonica +libpthread +libstdcpp +endef + +TARGET_CFLAGS:=$(filter-out -O%,$(TARGET_CFLAGS)) -O3 +define Build/InstallDev + $(INSTALL_DIR) $(1)/usr/include + $(CP) $(PKG_INSTALL_DIR)/usr/include/tesseract $(1)/usr/include/ + $(INSTALL_DIR) $(1)/usr/lib + $(CP) $(PKG_INSTALL_DIR)/usr/lib/libtesseract.{a,so*} $(1)/usr/lib/ + $(INSTALL_DIR) $(1)/usr/lib/pkgconfig + $(CP) $(PKG_INSTALL_DIR)/usr/lib/pkgconfig/tesseract.pc $(1)/usr/lib/pkgconfig/ +endef + +define Package/tesseract/install + $(INSTALL_DIR) $(1)/usr/bin + $(CP) $(PKG_INSTALL_DIR)/usr/bin/* $(1)/usr/bin/ + $(INSTALL_DIR) $(1)/usr/lib + $(CP) $(PKG_INSTALL_DIR)/usr/lib/lib*.so.* $(1)/usr/lib/ + $(INSTALL_DIR) $(1)/usr/share + $(CP) $(PKG_INSTALL_DIR)/usr/share/* $(1)/usr/share/ +endef + +$(eval $(call BuildPackage,tesseract))