tesseract-lang 4.0.0 (new formula)
Introduces a formula that contains all the language data files for tesseract. The tesseract formula is patched to include only English and needed packages.
This commit is contained in:
parent
6b4a6e3f51
commit
e7ff660ca7
2 changed files with 43 additions and 8 deletions
26
Formula/tesseract-lang.rb
Normal file
26
Formula/tesseract-lang.rb
Normal file
|
@ -0,0 +1,26 @@
|
|||
class TesseractLang < Formula
|
||||
desc "Enables extra languages support for Tesseract"
|
||||
homepage "https://github.com/tesseract-ocr/tessdata_fast/"
|
||||
url "https://github.com/tesseract-ocr/tessdata_fast/archive/4.0.0.tar.gz"
|
||||
sha256 "f1b71e97f27bafffb6a730ee66fd9dc021afc38f318fdc80a464a84a519227fe"
|
||||
|
||||
depends_on "tesseract"
|
||||
|
||||
resource "testfile" do
|
||||
url "https://raw.githubusercontent.com/tesseract-ocr/test/6dd816cdaf3e76153271daf773e562e24c928bf5/testing/eurotext.tif"
|
||||
sha256 "7b9bd14aba7d5e30df686fbb6f71782a97f48f81b32dc201a1b75afe6de747d6"
|
||||
end
|
||||
|
||||
def install
|
||||
rm "eng.traineddata"
|
||||
rm "osd.traineddata"
|
||||
(share/"tessdata").install Dir["*"]
|
||||
end
|
||||
|
||||
test do
|
||||
resource("testfile").stage do
|
||||
system "#{Formula["tesseract"].bin}/tesseract", "./eurotext.tif", "./output", "-l", "eng+deu"
|
||||
assert_match "über den faulen Hund. Le renard brun\n", shell_output("cat ./output.txt")
|
||||
end
|
||||
end
|
||||
end
|
|
@ -21,11 +21,6 @@ class Tesseract < Formula
|
|||
depends_on "leptonica"
|
||||
depends_on "libtiff"
|
||||
|
||||
resource "tessdata" do
|
||||
url "https://github.com/tesseract-ocr/tessdata_fast/archive/4.0.0.tar.gz"
|
||||
sha256 "f1b71e97f27bafffb6a730ee66fd9dc021afc38f318fdc80a464a84a519227fe"
|
||||
end
|
||||
|
||||
resource "eng" do
|
||||
url "https://github.com/tesseract-ocr/tessdata_fast/raw/4.0.0/eng.traineddata"
|
||||
sha256 "7d4322bd2a7749724879683fc3912cb542f19906c83bcc1a52132556427170b2"
|
||||
|
@ -49,15 +44,29 @@ class Tesseract < Formula
|
|||
ENV.cxx11
|
||||
|
||||
system "./autogen.sh"
|
||||
system "./configure", "--prefix=#{prefix}", "--disable-dependency-tracking"
|
||||
system "./configure", "--prefix=#{prefix}", "--disable-dependency-tracking", "--datarootdir=#{HOMEBREW_PREFIX}/share"
|
||||
|
||||
system "make"
|
||||
|
||||
inreplace "tessdata/Makefile", "datarootdir = #{HOMEBREW_PREFIX}/share", "datarootdir = #{share}"
|
||||
inreplace "tessdata/configs/Makefile", "datarootdir = #{HOMEBREW_PREFIX}/share", "datarootdir = #{share}"
|
||||
inreplace "tessdata/tessconfigs/Makefile", "datarootdir = #{HOMEBREW_PREFIX}/share", "datarootdir = #{share}"
|
||||
|
||||
system "make", "install"
|
||||
|
||||
resource("snum").stage { mv "snum.traineddata", share/"tessdata" }
|
||||
resource("tessdata").stage { mv Dir["*"], share/"tessdata" }
|
||||
resource("eng").stage { mv "eng.traineddata", share/"tessdata" }
|
||||
resource("osd").stage { mv "osd.traineddata", share/"tessdata" }
|
||||
end
|
||||
|
||||
test do
|
||||
assert_match version.to_s, shell_output("#{bin}/tesseract -v 2>&1")
|
||||
resource "tests" do
|
||||
url "https://github.com/tesseract-ocr/test.git"
|
||||
end
|
||||
|
||||
resource("tests").stage do
|
||||
system "#{bin}/tesseract", "./testing/eurotext.tif", "./output", "-l", "eng"
|
||||
assert_equal "The (quick) [brown] {fox} jumps!\n", shell_output("sed -n 1p ./output.txt")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue