PDF Reader With Voice
October 29, 2022
Image Reader OCR code:
import pytesseract #pip install tesseract
import os
from PIL import Image
import pyttsx3
# http://www.pillalamarri.in/python/pdf-reader-with-voice/
engine = pyttsx3.init('sapi5')
voices = engine.getProperty('voices')
#print(voices[1].id)
engine.setProperty('voice',voices[0].id)
def speak(audio):
engine.say(audio)
engine.runAndWait()
pytesseract.pytesseract.tesseract_cmd = r"C:\\Program Files\\Tesseract-OCR\tesseract.exe" #Path to the tesseract
# http://www.pillalamarri.in/python/pdf-reader-with-voice/
img = Image.open('img2.jpg')# add Image name here with file extention
text = pytesseract.image_to_string(img)
print(text)
remember = open('remember.txt','w')
remember.write(text)
remember.close()
speak(text)
PDF Reader :
import io
from PIL import Image
import pytesseract
from wand.image import Image as wi
import pyttsx3
import speech_recognition as sr
# http://www.pillalamarri.in/python/pdf-reader-with-voice/
engine = pyttsx3.init('sapi5')
voices = engine.getProperty('voices')
#print(voices[1].id)
engine.setProperty('voice',voices[0].id)
def speak(audio):
engine.say(audio)
engine.runAndWait()
pytesseract.pytesseract.tesseract_cmd = r"C:\\Program Files\\Tesseract-OCR\tesseract.exe" #Path to the tesseract
pdf = wi(filename = "sample.pdf", resolution = 300)
pdfImage = pdf.convert('jpeg')
imageBlobs = []
for img in pdfImage.sequence:
imgPage = wi(image = img)
imageBlobs.append(imgPage.make_blob('jpeg'))
recognized_text = []
for imgBlob in imageBlobs:
im = Image.open(io.BytesIO(imgBlob))
text = pytesseract.image_to_string(im, lang = 'eng')
recognized_text.append(text)
# http://www.pillalamarri.in/python/pdf-reader-with-voice/
imageBlobs = str(text)
recognized_text = text
print(recognized_text)
speak(recognized_text)
remember = open('remember.txt','w')
remember.write(text)
remember.close()
# http://www.pillalamarri.in/python/pdf-reader-with-voice/
Posted in Python