当前位置：首页 > news >正文

深圳公司网站建设公司免费文档网站

news 2025/12/21 19:41:50

深圳公司网站建设公司,免费文档网站,郑州做网站易云巢,光学设计软件有哪些直播语音实时转字幕#xff1a; 基于Whisper的实时直播语音转录或翻译是一项使用OpenAI的Whisper模型实现的技术#xff0c;它能够实时将直播中的语音内容转录成文本#xff0c;甚至翻译成另一种语言。这一过程大致分为三个步骤#xff1a;捕获直播音频流、语音识别#x…直播语音实时转字幕基于Whisper的实时直播语音转录或翻译是一项使用OpenAI的Whisper模型实现的技术它能够实时将直播中的语音内容转录成文本甚至翻译成另一种语言。这一过程大致分为三个步骤捕获直播音频流、语音识别转录以及翻译如果需要。下面详细解释其原理和意义。原理捕获直播音频流首先需要从直播源捕获音频流。这通常通过软件工具实现如ffmpeg或streamlink它们可以接入直播平台如Twitch、YouTube等的直播流并提取音频数据。语音识别转录捕获到的音频流被送入Whisper模型进行语音识别。Whisper是OpenAI开发的一款强大的语音识别模型它能够准确地将语音转换成文本。该模型训练于多种语言的大量数据集上因此具有高度的准确性和多语言识别能力。翻译可选如果需要将转录的文本翻译成另一种语言可以进一步使用机器翻译模型如OpenAI的GPT、Google Translate等对转录文本进行翻译。意义提高可及性通过实时转录直播语音听障人士和不懂直播原语言的观众也能够理解内容大大提高了直播内容的可及性。内容归档与搜索转录生成的文本可以作为直播内容的归档便于未来搜索和回顾。相比视频数据文本更容易被搜索引擎索引从而提高内容的发现性。多语言翻译实时翻译可以让不同语言的观众理解和享受直播内容促进跨语言、跨文化的交流。学习和教育对于教育直播实时转录和翻译能够帮助学生更好地理解教学内容尤其是对于非母语学习者。内容审核转录文本还可以用于自动内容审核帮助直播平台监控和管理不适宜的内容。一、部署下载stream-translator GitHub - fortypercnt/stream-translator 实战whisper语音识别第一天部署服务器可远程访问实时语音转文字全部代码和详细部署步骤-CSDN博客如果在之前的文章实战whisper语音识别第一天部署服务器配置过环境可跳过下面安装。 git clone https://github.com/fortypercnt/stream-translator.git pip install -r requirements.txt 模型下载 large-v3模型https://huggingface.co/Systran/faster-whisper-large-v3/tree/main large-v2模型https://huggingface.co/guillaumekln/faster-whisper-large-v2/tree/main large-v2模型https://huggingface.co/guillaumekln/faster-whisper-large-v1/tree/main medium模型https://huggingface.co/guillaumekln/faster-whisper-medium/tree/main small模型https://huggingface.co/guillaumekln/faster-whisper-small/tree/main base模型https://huggingface.co/guillaumekln/faster-whisper-base/tree/main tiny模型https://huggingface.co/guillaumekln/faster-whisper-tiny/tree/main 经测试large-v3模型需要10G显存以上。显存不够的可以用小模型。使用方法 python translator.py 直播链接这个translator.py是进行实时翻译不想翻译可运行下面代码二、代码 translator1.py import argparse import sys import signal from datetime import datetimeimport ffmpeg import numpy as np import whisper from whisper.audio import SAMPLE_RATEclass RingBuffer:def __init__(self, size):self.size sizeself.data []self.full Falseself.cur 0def append(self, x):if self.size 0:returnif self.full:self.data[self.cur] xself.cur (self.cur 1) % self.sizeelse:self.data.append(x)if len(self.data) self.size:self.full Truedef get_all(self):all_data []for i in range(len(self.data)):idx (i self.cur) % self.sizeall_data.append(self.data[idx])return all_datadef clear(self):self.data []self.full Falseself.cur 0def open_stream(stream, direct_url, preferred_quality):if direct_url:try:process (ffmpeg.input(stream, loglevelpanic).output(pipe:, formats16le, acodecpcm_s16le, ac1, arSAMPLE_RATE).run_async(pipe_stdoutTrue))except ffmpeg.Error as e:raise RuntimeError(fFailed to load audio: {e.stderr.decode()}) from ereturn process, Noneimport streamlinkimport subprocessimport threadingstream_options streamlink.streams(stream)if not stream_options:print(No playable streams found on this URL:, stream)sys.exit(0)option Nonefor quality in [preferred_quality, audio_only, audio_mp4a, audio_opus, best]:if quality in stream_options:option qualitybreakif option is None:# Fallbackoption next(iter(stream_options.values()))def writer(streamlink_proc, ffmpeg_proc):while (not streamlink_proc.poll()) and (not ffmpeg_proc.poll()):try:chunk streamlink_proc.stdout.read(1024)ffmpeg_proc.stdin.write(chunk)except (BrokenPipeError, OSError):passcmd [streamlink, stream, option, -O]streamlink_process subprocess.Popen(cmd, stdoutsubprocess.PIPE)try:ffmpeg_process (ffmpeg.input(pipe:, loglevelpanic).output(pipe:, formats16le, acodecpcm_s16le, ac1, arSAMPLE_RATE).run_async(pipe_stdinTrue, pipe_stdoutTrue))except ffmpeg.Error as e:raise RuntimeError(fFailed to load audio: {e.stderr.decode()}) from ethread threading.Thread(targetwriter, args(streamlink_process, ffmpeg_process))thread.start()return ffmpeg_process, streamlink_processdef main(url, modellarge-v3, interval5, preferred_qualityaudio_only, direct_urlFalse, **decode_options):print(Loading model...)model whisper.load_model(model)print(Opening stream...)ffmpeg_process, _ open_stream(url, direct_url, preferred_quality)def handler(signum, frame):ffmpeg_process.kill()sys.exit(0)signal.signal(signal.SIGINT, handler)n_bytes interval * SAMPLE_RATE * 2 # Factor 2 comes from reading the int16 stream as bytesaudio_buffer RingBuffer(1) # No need for a history buffer since were just doing real-time transcriptiontry:while True:in_bytes ffmpeg_process.stdout.read(n_bytes)if not in_bytes:breakaudio np.frombuffer(in_bytes, np.int16).flatten().astype(np.float32) / 32768.0audio_buffer.append(audio)result model.transcribe(np.concatenate(audio_buffer.get_all()), **decode_options)print(f{datetime.now().strftime(%H:%M:%S)} {result[text]})audio_buffer.clear() # Clear the buffer after each transcriptionfinally:ffmpeg_process.kill()def cli():parser argparse.ArgumentParser(descriptionReal-time audio transcription from streams.)parser.add_argument(URL, typestr, helpStream website and channel name, e.g. twitch.tv/forsen)parser.add_argument(--model, typestr, defaultlarge-v3, helpWhisper model for transcription.)parser.add_argument(--interval, typeint, default5, helpInterval between transcription in seconds.)parser.add_argument(--preferred_quality, typestr, defaultaudio_only, helpPreferred stream quality.)parser.add_argument(--direct_url, actionstore_true, helpPass the URL directly to ffmpeg.)args parser.parse_args().__dict__url args.pop(URL)main(url, **args)if __name__ __main__:cli()python translator1.py https://www.huya.com/kpl 虎牙kpl的直播文字转录还有繁体字修改代码繁体转简体 pip install opencc-python-reimplementedtranslator2.py import argparse import sys import signal from datetime import datetimeimport ffmpeg import numpy as np import whisper from whisper.audio import SAMPLE_RATE import openccclass RingBuffer:def __init__(self, size):self.size sizeself.data []self.full Falseself.cur 0def append(self, x):if self.size 0:returnif self.full:self.data[self.cur] xself.cur (self.cur 1) % self.sizeelse:self.data.append(x)if len(self.data) self.size:self.full Truedef get_all(self):all_data []for i in range(len(self.data)):idx (i self.cur) % self.sizeall_data.append(self.data[idx])return all_datadef clear(self):self.data []self.full Falseself.cur 0def open_stream(stream, direct_url, preferred_quality):if direct_url:try:process (ffmpeg.input(stream, loglevelpanic).output(pipe:, formats16le, acodecpcm_s16le, ac1, arSAMPLE_RATE).run_async(pipe_stdoutTrue))except ffmpeg.Error as e:raise RuntimeError(fFailed to load audio: {e.stderr.decode()}) from ereturn process, Noneimport streamlinkimport subprocessimport threadingstream_options streamlink.streams(stream)if not stream_options:print(No playable streams found on this URL:, stream)sys.exit(0)option Nonefor quality in [preferred_quality, audio_only, audio_mp4a, audio_opus, best]:if quality in stream_options:option qualitybreakif option is None:# Fallbackoption next(iter(stream_options.values()))def writer(streamlink_proc, ffmpeg_proc):while (not streamlink_proc.poll()) and (not ffmpeg_proc.poll()):try:chunk streamlink_proc.stdout.read(1024)ffmpeg_proc.stdin.write(chunk)except (BrokenPipeError, OSError):passcmd [streamlink, stream, option, -O]streamlink_process subprocess.Popen(cmd, stdoutsubprocess.PIPE)try:ffmpeg_process (ffmpeg.input(pipe:, loglevelpanic).output(pipe:, formats16le, acodecpcm_s16le, ac1, arSAMPLE_RATE).run_async(pipe_stdinTrue, pipe_stdoutTrue))except ffmpeg.Error as e:raise RuntimeError(fFailed to load audio: {e.stderr.decode()}) from ethread threading.Thread(targetwriter, args(streamlink_process, ffmpeg_process))thread.start()return ffmpeg_process, streamlink_processdef main(url, modellarge-v3, interval5, preferred_qualityaudio_only, direct_urlFalse, **decode_options):print(Loading model...)model whisper.load_model(model)print(Opening stream...)ffmpeg_process, _ open_stream(url, direct_url, preferred_quality)converter opencc.OpenCC(t2s) # 创建繁体转简体的转换器def handler(signum, frame):ffmpeg_process.kill()sys.exit(0)signal.signal(signal.SIGINT, handler)n_bytes interval * SAMPLE_RATE * 2 # Factor 2 comes from reading the int16 stream as bytesaudio_buffer RingBuffer(1)try:while True:in_bytes ffmpeg_process.stdout.read(n_bytes)if not in_bytes:breakaudio np.frombuffer(in_bytes, np.int16).flatten().astype(np.float32) / 32768.0audio_buffer.append(audio)result model.transcribe(np.concatenate(audio_buffer.get_all()), **decode_options)result_text converter.convert(result[text]) # 将繁体转换为简体print(f{datetime.now().strftime(%H:%M:%S)} {result_text})audio_buffer.clear()finally:ffmpeg_process.kill()def cli():parser argparse.ArgumentParser(descriptionReal-time audio transcription from streams.)parser.add_argument(URL, typestr, helpStream website and channel name, e.g. twitch.tv/forsen)parser.add_argument(--model, typestr, defaultlarge-v3, helpWhisper model for transcription.)parser.add_argument(--interval, typeint, default5, helpInterval between transcription in seconds.)parser.add_argument(--preferred_quality, typestr, defaultaudio_only, helpPreferred stream quality.)parser.add_argument(--direct_url, actionstore_true, helpPass the URL directly to ffmpeg.)args parser.parse_args().__dict__url args.pop(URL)main(url, **args)if __name__ __main__:cli()python translator2.py https://www.huya.com/kpl

查看全文

http://www.pierceye.com/news/869679/