tesseract-lang 4.0.0 (new formula)

Introduces a formula that contains all the language data files for tesseract.
The tesseract formula is patched to include only English and needed packages.
This commit is contained in:
Alberto Sottile 2019-02-14 18:34:13 +01:00 committed by FX Coudert
parent 6b4a6e3f51
commit e7ff660ca7
2 changed files with 43 additions and 8 deletions

26
Formula/tesseract-lang.rb Normal file
View file

@ -0,0 +1,26 @@
class TesseractLang < Formula
desc "Enables extra languages support for Tesseract"
homepage "https://github.com/tesseract-ocr/tessdata_fast/"
url "https://github.com/tesseract-ocr/tessdata_fast/archive/4.0.0.tar.gz"
sha256 "f1b71e97f27bafffb6a730ee66fd9dc021afc38f318fdc80a464a84a519227fe"
depends_on "tesseract"
resource "testfile" do
url "https://raw.githubusercontent.com/tesseract-ocr/test/6dd816cdaf3e76153271daf773e562e24c928bf5/testing/eurotext.tif"
sha256 "7b9bd14aba7d5e30df686fbb6f71782a97f48f81b32dc201a1b75afe6de747d6"
end
def install
rm "eng.traineddata"
rm "osd.traineddata"
(share/"tessdata").install Dir["*"]
end
test do
resource("testfile").stage do
system "#{Formula["tesseract"].bin}/tesseract", "./eurotext.tif", "./output", "-l", "eng+deu"
assert_match "über den faulen Hund. Le renard brun\n", shell_output("cat ./output.txt")
end
end
end

View file

@ -21,11 +21,6 @@ class Tesseract < Formula
depends_on "leptonica"
depends_on "libtiff"
resource "tessdata" do
url "https://github.com/tesseract-ocr/tessdata_fast/archive/4.0.0.tar.gz"
sha256 "f1b71e97f27bafffb6a730ee66fd9dc021afc38f318fdc80a464a84a519227fe"
end
resource "eng" do
url "https://github.com/tesseract-ocr/tessdata_fast/raw/4.0.0/eng.traineddata"
sha256 "7d4322bd2a7749724879683fc3912cb542f19906c83bcc1a52132556427170b2"
@ -49,15 +44,29 @@ class Tesseract < Formula
ENV.cxx11
system "./autogen.sh"
system "./configure", "--prefix=#{prefix}", "--disable-dependency-tracking"
system "./configure", "--prefix=#{prefix}", "--disable-dependency-tracking", "--datarootdir=#{HOMEBREW_PREFIX}/share"
system "make"
inreplace "tessdata/Makefile", "datarootdir = #{HOMEBREW_PREFIX}/share", "datarootdir = #{share}"
inreplace "tessdata/configs/Makefile", "datarootdir = #{HOMEBREW_PREFIX}/share", "datarootdir = #{share}"
inreplace "tessdata/tessconfigs/Makefile", "datarootdir = #{HOMEBREW_PREFIX}/share", "datarootdir = #{share}"
system "make", "install"
resource("snum").stage { mv "snum.traineddata", share/"tessdata" }
resource("tessdata").stage { mv Dir["*"], share/"tessdata" }
resource("eng").stage { mv "eng.traineddata", share/"tessdata" }
resource("osd").stage { mv "osd.traineddata", share/"tessdata" }
end
test do
assert_match version.to_s, shell_output("#{bin}/tesseract -v 2>&1")
resource "tests" do
url "https://github.com/tesseract-ocr/test.git"
end
resource("tests").stage do
system "#{bin}/tesseract", "./testing/eurotext.tif", "./output", "-l", "eng"
assert_equal "The (quick) [brown] {fox} jumps!\n", shell_output("sed -n 1p ./output.txt")
end
end
end