c927c04a1e
Signed-off-by: Adam Vandenberg <flangy@gmail.com>
43 lines
1.3 KiB
Ruby
43 lines
1.3 KiB
Ruby
require 'formula'
|
|
require 'download_strategy'
|
|
|
|
# Normal strategy tries to untar as well
|
|
class GzipOnlyDownloadStrategy < CurlDownloadStrategy
|
|
def stage
|
|
FileUtils.mv @tarball_path, File.basename(@url)
|
|
safe_system '/usr/bin/gunzip', '-f', File.basename(@url)
|
|
end
|
|
end
|
|
|
|
class TesseractEnglishData < Formula
|
|
url 'http://tesseract-ocr.googlecode.com/files/eng.traineddata.gz',
|
|
:using => GzipOnlyDownloadStrategy
|
|
md5 'd91041ad156cf2db36664e91ef799451'
|
|
version '3.00'
|
|
end
|
|
|
|
class Tesseract < Formula
|
|
url 'http://tesseract-ocr.googlecode.com/files/tesseract-3.00.tar.gz'
|
|
homepage 'http://code.google.com/p/tesseract-ocr/'
|
|
md5 'cc812a261088ea0c3d2da735be35d09f'
|
|
|
|
depends_on 'libtiff'
|
|
|
|
fails_with_llvm "Executable 'tesseract' segfaults on 10.6 when compiled with llvm-gcc", :build => "2206"
|
|
|
|
def install
|
|
system "./configure", "--disable-dependency-tracking", "--prefix=#{prefix}"
|
|
system "make install"
|
|
TesseractEnglishData.new.brew { mv "eng.traineddata", "#{share}/tessdata/" }
|
|
end
|
|
|
|
def caveats; <<-EOF.undent
|
|
Tesseract is an OCR (Optical Character Recognition) engine.
|
|
|
|
The easiest way to use it is to convert the source to a Grayscale tiff:
|
|
`convert source.png -type Grayscale terre_input.tif`
|
|
then run tesseract:
|
|
`tesseract terre_input.tif output`
|
|
EOF
|
|
end
|
|
end
|