寻找扬声器声音的可录制接口
import sounddevice as sd
import wave
import osdevices=sd.query_devices()
print(devices)for i in range(len(devices)):wf=wave.open(f'test{i}.wav','wb')wf.setnchannels(1)wf.setsampwidth(2)wf.setframerate(16000)def callback(indata,frames,time,status):wf.writeframes(bytes(indata))try:with sd.RawInputStream(samplerate=16000,blocksize=4096,dtype='int16',channels=1,device=i,callback=callback):sd.sleep(10000)print(f'{i} interface available')wf.close()except:print(f'{i} interface unavailable')wf.close()os.remove(f'test{i}.wav')
针对sounddevice.query_devices()
所查询到的每个设备录制音频,出现报错则说明该设备无法录制。聆听每个可用设备所生成的test.wav,可以找到目标设备。
录制屏幕
import sounddevice as sd
import wave
import os
import time
import numpy as np
from PIL import ImageGrab,Image
import cv2
from multiprocessing import Process
import subprocess
import shutildef record_screen_motion(st):while time.time()<st:passwhile True:img=ImageGrab.grab(bbox=(0,0,1920,1080)) # windows-系统设置-屏幕-分辨率,屏幕坐标系ct=time.time()img=np.array(img.getdata(),np.uint8).reshape(img.size[1],img.size[0],3)img=cv2.cvtColor(img,cv2.COLOR_RGB2BGR)cv2.imwrite(f'{int((ct-st)*10):05d}.jpg',img)if __name__=='__main__':di=time.strftime('%Y%m%d%H%M%S')os.mkdir(di)os.chdir(di)wf=wave.open('test.wav','wb')wf.setnchannels(1)wf.setsampwidth(2)wf.setframerate(16000)def callback(indata,frames,time,status):wf.writeframes(bytes(indata))try:st=time.time()record_screen_process=Process(target=record_screen_motion,args=(st+1,))record_screen_process.daemon=Truerecord_screen_process.start()while time.time()<st+1:passwith sd.RawInputStream(samplerate=16000,blocksize=1024,dtype='int16',channels=1,device=21,callback=callback):while True:sd.sleep(10000)except KeyboardInterrupt:wf.close()files=os.listdir()existed_num=[]for file in files:if file.endswith('.jpg'):existed_num.append(int(file.split('.')[0]))existed_num.sort()count=0for i in range(existed_num[-1]):if not os.path.exists(f'{i:05d}.jpg'):shutil.copy(f'{existed_num[count]:05d}.jpg',f'{i:05d}.jpg')else:count+=1subprocess.run('ffmpeg -f image2 -framerate 10 -i "%05d.jpg" -b:v 25313k test.mp4')subprocess.run('ffmpeg -i test.mp4 -i test.wav -vcodec copy -acodec aac o.mp4') # 音频流进行aac编码,来构造视频,MP4容器中不支持PCM(pcm_alaw、pcm_s16le),https://cloud.tencent.com/developer/ask/sof/105539438print('完成录制屏幕任务')
截屏和录音在脚本启动1秒后同时开始进行,由于笔记本的垃圾性能,隔40毫秒启动一个截屏线程(以电影帧率来做帧),跟不用线程、连续截屏的效果一样(1fps),算好截屏时跟0帧之间的距离,按照10fps为帧编号,且复制补充中间帧,最后构造图片流,如果电脑硬件条件好,截屏速率大于10fps,可以考虑改变一些参数,使得画面更加流畅。
参考链接:
massorant Vosk_real-time_stt
tellw/speech_input
python 三种方式实现截屏(详解+完整代码)
创建于2412222215,修改于2412222215