新增解析的js

This commit is contained in:
2025-07-16 18:34:49 +08:00
parent 122208d9cb
commit 596760a707
9 changed files with 2153 additions and 15 deletions

View File

@ -20,8 +20,8 @@
<%= htmlWebpackPlugin.options.title %>
</title>
<!-- 地图 -->
<script type="text/javascript" src="./pgis/eliMapboxgl.min.js"></script>
<!-- 地图 -->
<script type="text/javascript" src="./pgis/eliMapboxgl.min.js"></script>
<!-- 视频 -->
<script src="./static/js/vconsole.min.js"></script>
@ -31,26 +31,33 @@
<script src="./static/js/antd.min.js"></script>
<script src="./static/js/antd-with-locales.min.js"></script>
<!-- 文本提取 -->
<script src="./textPdf/pdf.min.js"></script>
<!-- 引入mammoth.js处理Word文件 -->
<script src="./textPdf/mammoth.browser.min.js"></script>
</head>
<body>
<noscript>
<strong>We're sorry but <%= htmlWebpackPlugin.options.title %> doesn't work
properly without JavaScript enabled. Please enable it to
continue.</strong>
</noscript>
<!-- 语音- 视频 提取 -->
<script src="./textVideo/recorder-core.js" charset="UTF-8"></script>
<script src="./textVideo/wav.js" charset="UTF-8"></script>
<script src="./textVideo/pcm.js" charset="UTF-8"></script>
<noscript><strong></strong> </noscript>
<div id="app"></div>
<script>
document.documentElement.addEventListener(
"touchmove",
var videoText = ''
// 初始化PDF.js
pdfjsLib.GlobalWorkerOptions.workerSrc = "./textPdf/pdf.worker.min.js";
document.documentElement.addEventListener("touchmove",
function (event) {
if (event.touches.length > 1) {
event.preventDefault();
}
},
false
);
if (event.touches.length > 1) event.preventDefault();
}, false);
</script>
<script src="./textVideo/wsconnecter.js" charset="utf-8"></script>
<script src="./textVideo/textVideo.js" charset="utf-8"></script>
</body>
</html>

18
public/textPdf/mammoth.browser.min.js vendored Normal file

File diff suppressed because one or more lines are too long

22
public/textPdf/pdf.min.js vendored Normal file

File diff suppressed because one or more lines are too long

22
public/textPdf/pdf.worker.min.js vendored Normal file

File diff suppressed because one or more lines are too long

96
public/textVideo/pcm.js Normal file
View File

@ -0,0 +1,96 @@
/*
pcm编码器+编码引擎
https://github.com/xiangyuecn/Recorder
编码原理本编码器输出的pcm格式数据其实就是Recorder中的buffers原始数据经过了重新采样16位时为LE小端模式Little Endian并未经过任何编码处理
编码的代码和wav.js区别不大pcm加上一个44字节wav头即成wav文件所以要播放pcm就很简单了直接转成wav文件来播放已提供转换函数 Recorder.pcm2wav
*/
(function(){
"use strict";
Recorder.prototype.enc_pcm={
stable:true
,testmsg:"pcm为未封装的原始音频数据pcm数据文件无法直接播放支持位数8位、16位填在比特率里面采样率取值无限制"
};
Recorder.prototype.pcm=function(res,True,False){
var This=this,set=This.set
,size=res.length
,bitRate=set.bitRate==8?8:16;
var buffer=new ArrayBuffer(size*(bitRate/8));
var data=new DataView(buffer);
var offset=0;
// 写入采样数据
if(bitRate==8) {
for(var i=0;i<size;i++,offset++) {
//16转8据说是雷霄骅的 https://blog.csdn.net/sevennight1989/article/details/85376149 细节比blqw的按比例的算法清晰点虽然都有明显杂音
var val=(res[i]>>8)+128;
data.setInt8(offset,val,true);
};
}else{
for (var i=0;i<size;i++,offset+=2){
data.setInt16(offset,res[i],true);
};
};
True(new Blob([data.buffer],{type:"audio/pcm"}));
};
/**pcm直接转码成wav可以直接用来播放需同时引入wav.js
data: {
sampleRate:16000 pcm的采样率
bitRate:16 pcm的位数 取值8 或 16
blob:blob对象
}
data如果直接提供的blob将默认使用16位16khz的配置仅用于测试
True(wavBlob,duration)
False(msg)
**/
Recorder.pcm2wav=function(data,True,False){
if(data.slice && data.type!=null){//Blob 测试用
data={blob:data};
};
var sampleRate=data.sampleRate||16000,bitRate=data.bitRate||16;
if(!data.sampleRate || !data.bitRate){
console.warn("pcm2wav必须提供sampleRate和bitRate");
};
if(!Recorder.prototype.wav){
False("pcm2wav必须先加载wav编码器wav.js");
return;
};
var reader=new FileReader();
reader.onloadend=function(){
var pcm;
if(bitRate==8){
//8位转成16位
var u8arr=new Uint8Array(reader.result);
pcm=new Int16Array(u8arr.length);
for(var j=0;j<u8arr.length;j++){
pcm[j]=(u8arr[j]-128)<<8;
};
}else{
pcm=new Int16Array(reader.result);
};
Recorder({
type:"wav"
,sampleRate:sampleRate
,bitRate:bitRate
}).mock(pcm,sampleRate).stop(function(wavBlob,duration){
True(wavBlob,duration);
},False);
};
reader.readAsArrayBuffer(data.blob);
};
})();

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,275 @@
/**
* Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights
* Reserved. MIT License (https://opensource.org/licenses/MIT)
*/
/* 2022-2023 by zhaoming,mali aihealthx.com */
// 连接; 定义socket连接类对象与语音对象
var wsconnecter = new WebSocketConnectMethod({ msgHandle: getJsonMessage, stateHandle: getConnState });
var audioBlob;
var isfilemode = true; // if it is in file mode
// 录音; 定义录音对象,wav格式
var rec = Recorder({
type: "pcm",
bitRate: 16,
sampleRate: 16000,
onProcess: recProcess
});
var sampleBuf = new Int16Array();
var rec_text = ""; // for online rec asr result
var offline_text = ""; // for offline rec asr result
var file_ext = "";
var file_sample_rate = 16000; //for wav file sample rate
var file_data_array; // array to save file data
var totalsend = 0;
addresschange();
function addresschange() {
var Uri = 'ws://192.168.0.232:10095';
Uri = Uri.replace(/wss/g, "https");
window.open(Uri, '_blank');
}
var readWavInfo = function (bytes) {
//读取wav文件头统一成44字节的头
if (bytes.byteLength < 44) {
return null;
};
var wavView = bytes;
var eq = function (p, s) {
for (var i = 0; i < s.length; i++) {
if (wavView[p + i] != s.charCodeAt(i)) {
return false;
};
};
return true;
};
if (eq(0, "RIFF") && eq(8, "WAVEfmt ")) {
var numCh = wavView[22];
if (wavView[20] == 1 && (numCh == 1 || numCh == 2)) {//raw pcm 单或双声道
var sampleRate = wavView[24] + (wavView[25] << 8) + (wavView[26] << 16) + (wavView[27] << 24);
var bitRate = wavView[34] + (wavView[35] << 8);
var heads = [wavView.subarray(0, 12)], headSize = 12;//head只保留必要的块
//搜索data块的位置
var dataPos = 0; // 44 或有更多块
for (var i = 12, iL = wavView.length - 8; i < iL;) {
if (wavView[i] == 100 && wavView[i + 1] == 97 && wavView[i + 2] == 116 && wavView[i + 3] == 97) {//eq(i,"data")
heads.push(wavView.subarray(i, i + 8));
headSize += 8;
dataPos = i + 8; break;
}
var i0 = i;
i += 4;
i += 4 + wavView[i] + (wavView[i + 1] << 8) + (wavView[i + 2] << 16) + (wavView[i + 3] << 24);
if (i0 == 12) {//fmt
heads.push(wavView.subarray(i0, i));
headSize += i - i0;
}
}
if (dataPos) {
var wavHead = new Uint8Array(headSize);
for (var i = 0, n = 0; i < heads.length; i++) {
wavHead.set(heads[i], n); n += heads[i].length;
}
return {
sampleRate: sampleRate
, bitRate: bitRate
, numChannels: numCh
, wavHead44: wavHead
, dataPos: dataPos
};
};
};
};
return null;
};
function upfileOnchange(files) {
this.files = [files];
var len = this.files.length;
for (let i = 0; i < len; i++) {
let fileAudio = new FileReader();
fileAudio.readAsArrayBuffer(this.files[i]);
file_ext = this.files[i].name.split('.').pop().toLowerCase();
var audioblob;
fileAudio.onload = function () {
audioblob = fileAudio.result;
file_data_array = audioblob;
}
fileAudio.onerror = function (e) {
console.log('error' + e);
}
}
// for wav file, we get the sample rate
if (file_ext == "wav") {
for (let i = 0; i < len; i++) {
let fileAudio = new FileReader();
fileAudio.readAsArrayBuffer(this.files[i]);
fileAudio.onload = function () {
audioblob = new Uint8Array(fileAudio.result);
var info = readWavInfo(audioblob);
file_sample_rate = info.sampleRate;
}
}
}
}
function play_file() {
var audioblob = new Blob([new Uint8Array(file_data_array)], { type: "audio/wav" });
var audio_record = document.getElementById('audio_record');
audio_record.src = (window.URL || webkitURL).createObjectURL(audioblob);
audio_record.controls = true;
}
function start_file_send() {
sampleBuf = new Uint8Array(file_data_array);
var chunk_size = 960; // for asr chunk_size [5, 10, 5]
while (sampleBuf.length >= chunk_size) {
sendBuf = sampleBuf.slice(0, chunk_size);
totalsend = totalsend + sampleBuf.length;
sampleBuf = sampleBuf.slice(chunk_size, sampleBuf.length);
wsconnecter.wsSend(sendBuf);
}
stop();
}
function stop() {
var chunk_size = new Array(5, 10, 5);
var request = {
"chunk_size": chunk_size,
"wav_name": "h5",
"is_speaking": false,
"chunk_interval": 10,
"mode": getAsrMode(),
};
if (sampleBuf.length > 0) {
wsconnecter.wsSend(sampleBuf);
sampleBuf = new Int16Array();
}
wsconnecter.wsSend(JSON.stringify(request));
// 控件状态更新
isRec = false;
if (isfilemode == false) {
//wait 3s for asr result
setTimeout(function () {
wsconnecter.wsStop();
}, 3000);
rec.stop(function (blob, duration) {
var audioBlob = Recorder.pcm2wav(data = { sampleRate: 16000, bitRate: 16, blob: blob },
function (theblob, duration) {
console.log(theblob);
var audio_record = document.getElementById('audio_record');
audio_record.src = (window.URL || webkitURL).createObjectURL(theblob);
audio_record.controls = true;
}, function (msg) {
console.log(msg);
}
);
}, function (errMsg) {
console.log("errMsg: " + errMsg);
});
}
// 停止连接
}
function getAsrMode() {
return 'offline';
}
function getHotwords() {
return null
}
function handleWithTimestamp(tmptext, tmptime) {
if (tmptime == null || tmptime == "undefined" || tmptext.length <= 0) {
return tmptext;
}
tmptext = tmptext.replace(/。|||、|\?|\.|\ /g, ","); // in case there are a lot of "。"
var words = tmptext.split(","); // split to chinese sentence or english words
var jsontime = JSON.parse(tmptime); //JSON.parse(tmptime.replace(/\]\]\[\[/g, "],[")); // in case there are a lot segments by VAD
var char_index = 0; // index for timestamp
var text_withtime = "";
for (var i = 0; i < words.length; i++) {
if (words[i] == "undefined" || words[i].length <= 0) {
continue;
}
if (/^[a-zA-Z]+$/.test(words[i])) { // if it is english
text_withtime = text_withtime + jsontime[char_index][0] / 1000 + ":" + words[i] + "\n";
char_index = char_index + 1; //for english, timestamp unit is about a word
}
else {
text_withtime = text_withtime + jsontime[char_index][0] / 1000 + ":" + words[i] + "\n";
char_index = char_index + words[i].length; //for chinese, timestamp unit is about a char
}
}
return text_withtime;
}
// 语音识别结果; 对jsonMsg数据解析,将识别结果附加到编辑框中
function getJsonMessage(jsonMsg) {
var rectxt = "" + JSON.parse(jsonMsg.data)['text'];
var asrmodel = JSON.parse(jsonMsg.data)['mode'];
var is_final = JSON.parse(jsonMsg.data)['is_final'];
var timestamp = JSON.parse(jsonMsg.data)['timestamp'];
if (asrmodel == "2pass-offline" || asrmodel == "offline") {
offline_text = offline_text + handleWithTimestamp(rectxt, timestamp); //rectxt; //.replace(/ +/g,"");
rec_text = offline_text;
} else {
rec_text = rec_text + rectxt;
}
videoText = rec_text;
if (is_final == true) {
play_file();
wsconnecter.wsStop();
btnConnect.disabled = false;
}
}
// 连接状态响应
function getConnState(connState) {
if (connState === 0) start_file_send();
}
// 识别启动、停止、清空操作
function start() {
var ret = wsconnecter.wsStart();//启动连接
return ret == 1 ? 1 : 0;
}
function recProcess(buffer, powerLevel, bufferDuration, bufferSampleRate, newBufferIdx, asyncEnd) {
if (isRec === true) {
var data_48k = buffer[buffer.length - 1];
var array_48k = new Array(data_48k);
var data_16k = Recorder.SampleData(array_48k, bufferSampleRate, 16000).data;
sampleBuf = Int16Array.from([...sampleBuf, ...data_16k]);
var chunk_size = 960; // for asr chunk_size [5, 10, 5]
while (sampleBuf.length >= chunk_size) {
sendBuf = sampleBuf.slice(0, chunk_size);
sampleBuf = sampleBuf.slice(chunk_size, sampleBuf.length);
wsconnecter.wsSend(sendBuf);
}
}
}
function getUseITN() {
return false;
}

86
public/textVideo/wav.js Normal file
View File

@ -0,0 +1,86 @@
/*
wav编码器+编码引擎
https://github.com/xiangyuecn/Recorder
当然最佳推荐使用mp3、wav格式代码也是优先照顾这两种格式
浏览器支持情况
https://developer.mozilla.org/en-US/docs/Web/HTML/Supported_media_formats
编码原理给pcm数据加上一个44直接的wav头即成wav文件pcm数据就是Recorder中的buffers原始数据重新采样16位时为LE小端模式Little Endian实质上是未经过任何编码处理
*/
(function(){
"use strict";
Recorder.prototype.enc_wav={
stable:true
,testmsg:"支持位数8位、16位填在比特率里面采样率取值无限制"
};
Recorder.prototype.wav=function(res,True,False){
var This=this,set=This.set
,size=res.length
,sampleRate=set.sampleRate
,bitRate=set.bitRate==8?8:16;
//编码数据 https://github.com/mattdiamond/Recorderjs https://www.cnblogs.com/blqw/p/3782420.html https://www.cnblogs.com/xiaoqi/p/6993912.html
var dataLength=size*(bitRate/8);
var buffer=new ArrayBuffer(44+dataLength);
var data=new DataView(buffer);
var offset=0;
var writeString=function(str){
for (var i=0;i<str.length;i++,offset++) {
data.setUint8(offset,str.charCodeAt(i));
};
};
var write16=function(v){
data.setUint16(offset,v,true);
offset+=2;
};
var write32=function(v){
data.setUint32(offset,v,true);
offset+=4;
};
/* RIFF identifier */
writeString('RIFF');
/* RIFF chunk length */
write32(36+dataLength);
/* RIFF type */
writeString('WAVE');
/* format chunk identifier */
writeString('fmt ');
/* format chunk length */
write32(16);
/* sample format (raw) */
write16(1);
/* channel count */
write16(1);
/* sample rate */
write32(sampleRate);
/* byte rate (sample rate * block align) */
write32(sampleRate*(bitRate/8));// *1 声道
/* block align (channel count * bytes per sample) */
write16(bitRate/8);// *1 声道
/* bits per sample */
write16(bitRate);
/* data chunk identifier */
writeString('data');
/* data chunk length */
write32(dataLength);
// 写入采样数据
if(bitRate==8) {
for(var i=0;i<size;i++,offset++) {
//16转8据说是雷霄骅的 https://blog.csdn.net/sevennight1989/article/details/85376149 细节比blqw的按比例的算法清晰点虽然都有明显杂音
var val=(res[i]>>8)+128;
data.setInt8(offset,val,true);
};
}else{
for (var i=0;i<size;i++,offset+=2){
data.setInt16(offset,res[i],true);
};
};
True(new Blob([data.buffer],{type:"audio/wav"}));
}
})();

View File

@ -0,0 +1,119 @@
/**
* Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights
* Reserved. MIT License (https://opensource.org/licenses/MIT)
*/
/* 2021-2023 by zhaoming,mali aihealthx.com */
function WebSocketConnectMethod( config ) { //定义socket连接方法类
var speechSokt;
var connKeeperID;
var msgHandle = config.msgHandle;
var stateHandle = config.stateHandle;
this.wsStart = function () {
var Uri = 'ws://192.168.0.232:10095'; //"wss://111.205.137.58:5821/wss/" //设置wss asr online接口地址 如 wss://X.X.X.X:port/wss/
if(Uri.match(/wss:\S*|ws:\S*/))
{
console.log("Uri"+Uri);
}
else
{
alert("请检查wss地址正确性");
return 0;
}
if ( 'WebSocket' in window ) {
speechSokt = new WebSocket( Uri ); // 定义socket连接对象
speechSokt.onopen = function(e){onOpen(e);}; // 定义响应函数
speechSokt.onclose = function(e){
console.log("onclose ws!");
//speechSokt.close();
onClose(e);
};
speechSokt.onmessage = function(e){onMessage(e);};
speechSokt.onerror = function(e){onError(e);};
return 1;
}
else {
alert('当前浏览器不支持 WebSocket');
return 0;
}
};
// 定义停止与发送函数
this.wsStop = function () {
if(speechSokt != undefined) {
console.log("stop ws!");
speechSokt.close();
}
};
this.wsSend = function ( oneData ) {
if(speechSokt == undefined) return;
if ( speechSokt.readyState === 1 ) { // 0:CONNECTING, 1:OPEN, 2:CLOSING, 3:CLOSED
speechSokt.send( oneData );
}
};
// SOCEKT连接中的消息与状态响应
function onOpen( e ) {
// 发送json
var chunk_size = new Array( 5, 10, 5 );
var request = {
"chunk_size": chunk_size,
"wav_name": "h5",
"is_speaking": true,
"chunk_interval":10,
"itn":getUseITN(),
"mode":getAsrMode(),
};
if(isfilemode)
{
request.wav_format=file_ext;
if(file_ext=="wav")
{
request.wav_format="PCM";
request.audio_fs=file_sample_rate;
}
}
var hotwords=getHotwords();
if(hotwords!=null )
{
request.hotwords=hotwords;
}
console.log(JSON.stringify(request));
speechSokt.send(JSON.stringify(request));
console.log("连接成功");
stateHandle(0);
}
function onClose( e ) {
stateHandle(1);
}
function onMessage( e ) {
msgHandle( e );
}
function onError( e ) {
info_div.innerHTML="连接"+e;
console.log(e);
stateHandle(2);
}
}