import fitzfrom pyzbar import pyzbarfrom PIL import Image,ImageEnhance,ImageOpsimport reimport pytesseractimport ion=0f='71229.pdf'pdf=fitz.open(f)for page in pdf:    n+=1    text = page.get_text() #提取文本,传入参数'html'即:page.get_text('html') 则提取每页内容为html    pic=page.get_images() #提取图像字典,每一项都是元组    pat = r'\d{20}'    result=re.findall(pat,text)    print(result)    print("第"+str(n)+"页:")    for p in pic:        i=p[0] #元组第一项 为xref值        pix=fitz.Pixmap(pdf, i)        imgdata = pix.tobytes("JPG") #转成jpg字节        img = Image.open(io.BytesIO(imgdata)) #PIL image打开图像        img=ImageOps.expand(img, border=50, fill="white") #填充白边          text1=pytesseract.image_to_string(img,lang='chi_sim')#识别图像文字        print(text1)