python
pip install pyocr Pillow
python
import pyocr
import pyocr.builders
from PIL import Image
python
tools = pyocr.get_available_tools()
if len(tools) == 0:
sys.exit(1)
ocr_tool = tools[0]
python
import PyPDF2
pdf_file_path = "path/to/your/pdf/file.pdf"
pdf_file = open(pdf_file_path, 'rb')
pdf_reader = PyPDF2.PdfFileReader(pdf_file)
pages = []
for page_num in range(pdf_reader.numPages):
page = pdf_reader.getPage(page_num)
image = page.extract_text()
pages.append(image)
python
text = ""
for page in pages:
image = Image.open(page)
result = ocr_tool.image_to_string(
image,
lang='chi_sim',
builder=pyocr.builders.TextBuilder()
)
text += result
print(text)