homebrew-core/Formula/tesseract.rb
2019-10-01 09:02:16 +02:00

77 lines
2.9 KiB
Ruby

class Tesseract < Formula
desc "OCR (Optical Character Recognition) engine"
homepage "https://github.com/tesseract-ocr/"
url "https://github.com/tesseract-ocr/tesseract/archive/4.1.0.tar.gz"
sha256 "5c5ed5f1a76888dc57a83704f24ae02f8319849f5c4cf19d254296978a1a1961"
head "https://github.com/tesseract-ocr/tesseract.git"
bottle do
cellar :any
sha256 "e28683663b237b02037c5e54d452e54cc881029ed0f5e25a885ba6bb804440e2" => :catalina
sha256 "774212dc0edf27447e1d5ba46ae9d3dfb989a90b529e49d15df99b9bb16e73d7" => :mojave
sha256 "eb9d24e2550bc7f13826306de27ceec45c2ddbde3440976a695c33daefbb204d" => :high_sierra
sha256 "d118c493ae35173a0eb966dc7641887dc9670772561b7e6ba25bccf4bdaab384" => :sierra
end
depends_on "autoconf" => :build
depends_on "autoconf-archive" => :build
depends_on "automake" => :build
depends_on "libtool" => :build
depends_on "pkg-config" => :build
depends_on "leptonica"
depends_on "libtiff"
resource "eng" do
url "https://github.com/tesseract-ocr/tessdata_fast/raw/4.0.0/eng.traineddata"
sha256 "7d4322bd2a7749724879683fc3912cb542f19906c83bcc1a52132556427170b2"
end
resource "osd" do
url "https://github.com/tesseract-ocr/tessdata_fast/raw/4.0.0/osd.traineddata"
sha256 "9cf5d576fcc47564f11265841e5ca839001e7e6f38ff7f7aacf46d15a96b00ff"
end
resource "snum" do
url "https://github.com/USCDataScience/counterfeit-electronics-tesseract/raw/319a6eeacff181dad5c02f3e7a3aff804eaadeca/Training%20Tesseract/snum.traineddata"
sha256 "36f772980ff17c66a767f584a0d80bf2302a1afa585c01a226c1863afcea1392"
end
resource "testfile" do
url "https://raw.githubusercontent.com/tesseract-ocr/test/6dd816cdaf3e76153271daf773e562e24c928bf5/testing/eurotext.tif"
sha256 "7b9bd14aba7d5e30df686fbb6f71782a97f48f81b32dc201a1b75afe6de747d6"
end
def install
# explicitly state leptonica header location, as the makefile defaults to /usr/local/include,
# which doesn't work for non-default homebrew location
ENV["LIBLEPT_HEADERSDIR"] = HOMEBREW_PREFIX/"include"
ENV.cxx11
system "./autogen.sh"
system "./configure", "--prefix=#{prefix}", "--disable-dependency-tracking", "--datarootdir=#{HOMEBREW_PREFIX}/share"
system "make"
# make install in the local share folder to avoid permission errors
system "make", "install", "datarootdir=#{share}"
resource("snum").stage { mv "snum.traineddata", share/"tessdata" }
resource("eng").stage { mv "eng.traineddata", share/"tessdata" }
resource("osd").stage { mv "osd.traineddata", share/"tessdata" }
end
def caveats; <<~EOS
This formula contains only the "eng", "osd", and "snum" language data files.
If you need all the other supported languages, `brew install tesseract-lang`.
EOS
end
test do
resource("testfile").stage do
system bin/"tesseract", "./eurotext.tif", "./output", "-l", "eng"
assert_match "The (quick) [brown] {fox} jumps!\n", File.read("output.txt")
end
end
end