#!/usr/bin/env ruby # By Michael Moore # Placed in the Public Domain for any use by anybody. # A link to my site is appreicated, but by no means required http://bookscanned.com # Make this an empty string if you want verbose output, or redirect 2>1 if you want everything in the TakOCR output quiet="2>/dev/null" ENV['MAGICK_HOME']="/usr/local/ImageMagick-6.4.8/" ENV['DYLD_LIBRARY_PATH']="#{ENV['MAGICK_HOME']}/lib" ENV['PATH']="#{ENV['MAGICK_HOME']}/bin:/usr/local/bin:#{ENV['PATH']}" ENV['TESSDATA_PREFIX']='/usr/local/share/' tmpdir = '/tmp/takocr' + rand(1000000).to_s Dir.mkdir(tmpdir,0700) # Keep png files, don't imagemagick them imgs = Array.new() ARGV.each do |img| if(File.extname(img).downcase == '.png') #OCRopus handles pngs fine imgs << img elsif(File.extname(img).downcase == '.pdf' || File.extname(img).downcase == '.tiff' || File.extname(img).downcase == '.tif') # Convert every page of multipage documents into a png file `convert -density 300 -units PixelsPerInch #{img} #{tmpdir}/#{File.basename(img)}%d.png #{quiet}` else # Convert other images to png `convert -density 300 -units PixelsPerInch #{img} #{tmpdir}/#{File.basename(img)}.png #{quiet}` end end cmd = "ocroscript recognize #{imgs.join(' ')} #{Dir.entries(tmpdir).size > 2 ? tmpdir + "/*.png" : ''}" puts `#{cmd} #{quiet}`