當(dāng)前位置：首頁 > 编程语言 > python >内容正文

python

python androidhelper 语音识字_Python实现截图AI文字识字小工具

發(fā)布時間：2023/12/31 python 27 豆豆

生活随笔收集整理的這篇文章主要介紹了 python androidhelper 语音识字_Python实现截图AI文字识字小工具小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.

1. 獲取百度識字API

由于百度提供了免費(fèi)的圖片識字API，而且非常好用，所以直接使用百度的api來進(jìn)行識字。

首先在百度AI開放平臺注冊一個賬號，網(wǎng)址http://ai.baidu.com/，注冊完之后進(jìn)入右上角控制臺，點(diǎn)擊->"文字識別"

點(diǎn)擊“創(chuàng)建應(yīng)用”

創(chuàng)建一個識字的應(yīng)用，接口選擇“文字識別”，然后下面填個人，描述隨便寫一些

然后再應(yīng)用主頁中可以看到申請到的API key和Secret Key，該接口一天可以每天調(diào)用50000次，完全可以滿足我們個人的需求

2. 調(diào)用百度API

拿到API_KEY和SECRET_KEY后獲取token，然后通過token和接口來對圖片進(jìn)行識字，這里使用本地tmp.png作為目標(biāo)圖片進(jìn)行識別。代碼如下，想用的直接復(fù)制就好

baidu.py

"""

baidu.py

"""

import requests

import base64

API_KEY = '你的API_KEY'

SECRET_KEY = '你的SECRET_KEY'

def get_access_token():

# 獲取token值

# client_id 為官網(wǎng)獲取的API_KEY， client_secret 為官網(wǎng)獲取的SECRET_KEY

host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=' + \

'client_credentials&client_id=' + API_KEY + \

'&client_secret=' + SECRET_KEY

response = requests.get(host)

access_token = response.json()['access_token']

return access_token

access_token = get_access_token()

def img2word():

# 將圖片中的文字識別出來

global access_token

request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic"

# 二進(jìn)制方式打開圖片文件

f = open('./tmp.png', 'rb')

img = base64.b64encode(f.read())

params = {"image":img}

request_url = request_url + "?access_token=" + access_token

headers = {'content-type': 'application/x-www-form-urlencoded'}

response = requests.post(request_url, data=params, headers=headers)

print(response.json())

ret = [x['words'] for x in response.json()['words_result']]

return ret

3. 實(shí)現(xiàn)截圖功能

識字部分已經(jīng)完成，接下實(shí)現(xiàn)截圖功能，我們想要像微信里面截圖那樣可以選中區(qū)域進(jìn)行截圖，代碼如下

scrshoot.py

"""

scrshoot.py

"""

from PIL import ImageGrab

import tkinter

import ctypes

class CTkPrScrn:

def __init__(self):

self.__start_x, self.__start_y = 0, 0

self.__scale = 1

self.__win = tkinter.Tk()

self.__win.attributes("-alpha", 0.1) # 設(shè)置窗口半透明

self.__win.attributes("-fullscreen", True) # 設(shè)置全屏

self.__win.attributes("-topmost", True) # 設(shè)置窗口在最上層

self.__width, self.__height = self.__win.winfo_screenwidth(), self.__win.winfo_screenheight()

# 創(chuàng)建畫布

self.__canvas = tkinter.Canvas(self.__win, width=self.__width, height=self.__height, bg="white")

self.__win.bind('', self.xFunc1) # 綁定鼠標(biāo)左鍵點(diǎn)擊事件

self.__win.bind('', self.xFunc1) # 綁定鼠標(biāo)左鍵點(diǎn)擊釋放事件

self.__win.bind('', self.xFunc2) # 綁定鼠標(biāo)左鍵點(diǎn)擊移動事件

self.__win.bind('', lambda e: self.__win.destroy()) # 綁定Esc按鍵退出事件

user32 = ctypes.windll.user32

gdi32 = ctypes.windll.gdi32

dc = user32.GetDC(None)

widthScale = gdi32.GetDeviceCaps(dc, 8) # 分辨率縮放后的寬度

heightScale = gdi32.GetDeviceCaps(dc, 10) # 分辨率縮放后的高度

width = gdi32.GetDeviceCaps(dc, 118) # 原始分辨率的寬度

height = gdi32.GetDeviceCaps(dc, 117) # 原始分辨率的高度

self.__scale = width / widthScale

print(self.__width, self.__height, widthScale, heightScale, width, height, self.__scale)

self.__win.mainloop() # 窗口持久化

def xFunc1(self, event):

# print(f"鼠標(biāo)左鍵點(diǎn)擊了一次坐標(biāo)是:x={g_scale * event.x}, y={g_scale * event.y}")

if event.state == 8: # 鼠標(biāo)左鍵按下

self.__start_x, self.__start_y = event.x, event.y

elif event.state == 264: # 鼠標(biāo)左鍵釋放

if event.x == self.__start_x or event.y == self.__start_y:

return

im = ImageGrab.grab((self.__scale * self.__start_x, self.__scale * self.__start_y,

self.__scale * event.x, self.__scale * event.y))

imgName = 'tmp.png'

im.save(imgName)

print('保存成功')

self.__win.update()

self.__win.destroy()

self.__win.quit()

def xFunc2(self, event):

# print(f"鼠標(biāo)左鍵點(diǎn)擊了一次坐標(biāo)是:x={self.__scale * event.x}, y={self.__scale * event.y}")

if event.x == self.__start_x or event.y == self.__start_y:

return

self.__canvas.delete("prscrn")

self.__canvas.create_rectangle(self.__start_x, self.__start_y, event.x, event.y, outline='red', tag="prscrn")

# 包裝畫布

self.__canvas.pack()

if __name__ == '__main__':

prScrn = CTkPrScrn()

4. tkiner實(shí)現(xiàn)簡單UI

通過tk庫簡單實(shí)現(xiàn)一個窗口用于截圖和顯示文本，同時可以復(fù)制文本

front.py

"""

front.py

"""

import tkinter as tk

from baidu import img2word

from scrshoot import CTkPrScrn

def center_window(root, width, height):

# 將window設(shè)置在屏幕中間

screenwidth = root.winfo_screenwidth()

screenheight = root.winfo_screenheight()

size = '%dx%d+%d+%d' % (width, height, (screenwidth - width)/2, (screenheight - height)/2)

root.geometry(size)

def srcandserach():

global root, text_words

CTkPrScrn()

words = ' '.join(img2word())

print(words)

text_words.delete(0.0,tk.END)

text_words.insert(tk.INSERT,words)

root.update()

ft = ('Arial', '12') # 字體大小

root = tk.Tk()

root.title('識字搜索')

center_window(root, 800,300)

# 截圖按鈕

button_scr = tk.Button(root, text="截圖", font=ft, command=srcandserach)

text_words = tk.Text(root, width=80,height=10, font=ft)

button_scr.grid(row=0, column=0)

text_words.grid(row=1, column=0)

root.mainloop()

5. 工具展示

識字ui

點(diǎn)擊截圖識字，可以識別出截圖的文字信息，大功告成啦~

原文鏈接:https://blog.csdn.net/blowfire123/article/details/112079583

總結(jié)

以上是生活随笔為你收集整理的python androidhelper 语音识字_Python实现截图AI文字识字小工具的全部內(nèi)容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網(wǎng)站內(nèi)容還不錯，歡迎將生活随笔推薦給好友。

上一篇： X86 bios介绍
下一篇：机器视觉python推荐书籍_智能硬件与