2009-12-29 04:53:08 +00:00
|
|
|
require 'formula'
|
|
|
|
|
2011-03-10 05:11:03 +00:00
|
|
|
class TesseractEnglishData < Formula
|
2009-12-29 04:53:08 +00:00
|
|
|
url 'http://tesseract-ocr.googlecode.com/files/tesseract-2.00.eng.tar.gz'
|
|
|
|
md5 'b8291d6b3a63ce7879d688e845e341a9'
|
|
|
|
version '2.00'
|
|
|
|
end
|
|
|
|
|
2011-03-10 05:11:03 +00:00
|
|
|
class Tesseract < Formula
|
2009-12-29 04:53:08 +00:00
|
|
|
url 'http://tesseract-ocr.googlecode.com/files/tesseract-2.04.tar.gz'
|
|
|
|
homepage 'http://code.google.com/p/tesseract-ocr/'
|
|
|
|
md5 'b44eba1a9f4892ac62e484c807fe0533'
|
|
|
|
|
|
|
|
depends_on 'libtiff'
|
|
|
|
|
2011-03-21 21:24:22 +00:00
|
|
|
fails_with_llvm "Executable 'tesseract' segfaults on 10.6 when compiled with llvm-gcc", :build => "2206"
|
2010-01-31 12:35:51 +00:00
|
|
|
|
2011-03-21 21:24:22 +00:00
|
|
|
def install
|
2009-12-29 04:53:08 +00:00
|
|
|
# 'make install' expects the language data files in the build directory
|
|
|
|
d = Dir.getwd
|
2010-04-07 05:58:35 +00:00
|
|
|
TesseractEnglishData.new.brew { cp Dir["*"], "#{d}/tessdata/" }
|
2009-12-29 04:53:08 +00:00
|
|
|
|
|
|
|
system "./configure", "--prefix=#{prefix}", "--disable-debug", "--disable-dependency-tracking"
|
|
|
|
system "make install"
|
|
|
|
end
|
2010-04-07 05:58:35 +00:00
|
|
|
|
|
|
|
def caveats; <<-EOF.undent
|
|
|
|
Tesseract is an OCR (Optical Character Recognition) engine.
|
|
|
|
|
|
|
|
The easiest way to use it is to convert the source to a Grayscale tiff:
|
|
|
|
`convert source.png -type Grayscale terre_input.tif`
|
|
|
|
then run tesseract:
|
|
|
|
`tesseract terre_input.tif output`
|
|
|
|
EOF
|
|
|
|
end
|
2009-12-29 04:53:08 +00:00
|
|
|
end
|