diff --git a/Formula/ocrmypdf.rb b/Formula/ocrmypdf.rb new file mode 100644 index 0000000000..cf46e0ff02 --- /dev/null +++ b/Formula/ocrmypdf.rb @@ -0,0 +1,93 @@ +class Ocrmypdf < Formula + include Language::Python::Virtualenv + + desc "Adds an OCR text layer to scanned PDF files" + homepage "https://github.com/jbarlow83/OCRmyPDF" + url "https://files.pythonhosted.org/packages/c2/69/d250b73cab334a07608715db2f41bbf3d1e69e3f097042baec5cbfa7a9f7/ocrmypdf-5.5.tar.gz" + sha256 "8f2ba79c698490373c8fa4ac669c0f6e86a3f146edff6342db91428376ad2269" + + depends_on "pkg-config" => :build + depends_on "freetype" + depends_on "ghostscript" + depends_on "jpeg" + depends_on "libpng" + depends_on "python3" + depends_on "qpdf" + depends_on "tesseract" + depends_on "unpaper" + + resource "cffi" do + url "https://files.pythonhosted.org/packages/c9/70/89b68b6600d479034276fed316e14b9107d50a62f5627da37fafe083fde3/cffi-1.11.2.tar.gz" + sha256 "ab87dd91c0c4073758d07334c1e5f712ce8fe48f007b86f8238773963ee700a6" + end + + resource "img2pdf" do + url "https://files.pythonhosted.org/packages/7e/a2/4f06081f674920be757d894b4bab874e6a3b5227e730cb7618430b366e69/img2pdf-0.2.4.tar.gz" + sha256 "140b70fa3a3bfb54e92947818cee01483a4f1492b5d1d02b0f649257f5ffc9ae" + end + + resource "olefile" do + url "https://files.pythonhosted.org/packages/35/17/c15d41d5a8f8b98cc3df25eb00c5cee76193114c78e5674df6ef4ac92647/olefile-0.44.zip" + sha256 "61f2ca0cd0aa77279eb943c07f607438edf374096b66332fae1ee64a6f0f73ad" + end + + resource "Pillow" do + url "https://files.pythonhosted.org/packages/0f/57/25be1a4c2d487942c3ed360f6eee7f41c5b9196a09ca71c54d1a33c968d9/Pillow-5.0.0.tar.gz" + sha256 "12f29d6c23424f704c66b5b68c02fe0b571504459605cfe36ab8158359b0e1bb" + end + + resource "pycparser" do + url "https://files.pythonhosted.org/packages/8c/2d/aad7f16146f4197a11f8e91fb81df177adcc2073d36a17b1491fd09df6ed/pycparser-2.18.tar.gz" + sha256 "99a8ca03e29851d96616ad0404b4aad7d9ee16f25c9f9708a11faf2810f7b226" + end + + resource "PyPDF2" do + url "https://files.pythonhosted.org/packages/b4/01/68fcc0d43daf4c6bdbc6b33cc3f77bda531c86b174cac56ef0ffdb96faab/PyPDF2-1.26.0.tar.gz" + sha256 "e28f902f2f0a1603ea95ebe21dff311ef09be3d0f0ef29a3e44a932729564385" + end + + resource "reportlab" do + url "https://files.pythonhosted.org/packages/87/f9/53b34c58d3735a6df7d5c542bf4de60d699cfa6035e113ca08b3ecdcca3f/reportlab-3.4.0.tar.gz" + sha256 "5beaf35e59dfd5ebd814fdefd76908292e818c982bd7332b5d347dfd2f01c343" + end + + resource "ruffus" do + url "https://files.pythonhosted.org/packages/97/fe/12445c6793350ab5dbf76cb87a122b9e9aab9a9040a2801004806d985216/ruffus-2.6.3.tar.gz" + sha256 "d78728d802013d91d15e5e939554dabce196967734850fa44634dce47e3e5061" + end + + def install + venv = virtualenv_create(libexec, "python3") + + resource("Pillow").stage do + inreplace "setup.py" do |s| + sdkprefix = MacOS::CLT.installed? ? "" : MacOS.sdk_path + s.gsub! "openjpeg.h", "probably_not_a_header_called_this_eh.h" + s.gsub! "ZLIB_ROOT = None", "ZLIB_ROOT = ('#{sdkprefix}/usr/lib', '#{sdkprefix}/usr/include')" + s.gsub! "JPEG_ROOT = None", "JPEG_ROOT = ('#{Formula["jpeg"].opt_prefix}/lib', '#{Formula["jpeg"].opt_prefix}/include')" + s.gsub! "FREETYPE_ROOT = None", "FREETYPE_ROOT = ('#{Formula["freetype"].opt_prefix}/lib', '#{Formula["freetype"].opt_prefix}/include')" + end + + # avoid triggering "helpful" distutils code that doesn't recognize Xcode 7 .tbd stubs + ENV.append "CFLAGS", "-I#{MacOS.sdk_path}/System/Library/Frameworks/Tk.framework/Versions/8.5/Headers" unless MacOS::CLT.installed? + venv.pip_install Pathname.pwd + end + + res = resources.map(&:name).to_set - ["Pillow"] + + res.each do |r| + venv.pip_install resource(r) + end + + venv.pip_install_and_link buildpath + end + + test do + # Since we use Python 3, we require a UTF-8 locale + ENV["LC_ALL"] = "en_US.UTF-8" + + system "#{bin}/ocrmypdf", "-f", "-q", "--deskew", + test_fixtures("test.pdf"), "ocr.pdf" + assert_predicate "ocr.pdf", :exist? + end +end