374 lines
11 KiB
Vue
374 lines
11 KiB
Vue
<template>
|
||
<div>
|
||
<el-dialog v-model="modelValue" title="文件解析" width="1000px" :show-close="true" :center="true" :close-on-click-modal="false" :before-close="handleClose" >
|
||
<div class="flex align-center">
|
||
<h3>提取文件类型:</h3>
|
||
<el-radio-group v-model="active" @change="changeRadio">
|
||
<el-radio :label="'文件解析'">文件解析</el-radio>
|
||
<el-radio :label="'图片解析'">图片解析</el-radio>
|
||
</el-radio-group>
|
||
</div>
|
||
<!-- 文件解析 -->
|
||
<div v-show="active == '文件解析'">
|
||
<h1>文件文本提取工具</h1>
|
||
<p>上传文件提取文本内容(支持 .txt, .pdf, .docx, mp4 , mp3, wav)</p>
|
||
|
||
<div class="container">
|
||
<input type="file" id="file-input" accept=".txt,.pdf,.docx,'.mp4','.mp3','.wav'"/>
|
||
<button @click="chooseFile">选择文件</button>
|
||
<p id="file-info">未选择文件</p>
|
||
</div>
|
||
|
||
<button id="extract-btn" disabled>提取文本</button>
|
||
|
||
<h3>提取结果:</h3>
|
||
<div id="result">请先上传文件...</div>
|
||
</div>
|
||
<!-- 图片解析 -->
|
||
<div v-show="active == '图片解析'" v-loading="loading" element-loading-text="模型加载中......">
|
||
<div class="flex align-center just-between">
|
||
<h1>文件文本提取工具</h1>
|
||
<span title="刷新" class="pointer" >
|
||
<el-icon color="#0072ff" size="30px" @click="initPage"><RefreshRight /></el-icon>
|
||
<el-icon color="#23c044" size="14px" v-if="hasLoadingJS"><CircleCheckFilled /></el-icon>
|
||
<el-icon color="#e60e0e" size="14px" v-if="!hasLoadingJS"><CircleCloseFilled /></el-icon>
|
||
</span>
|
||
</div>
|
||
<p>上传文件提取文本内容(支持 .png, .jpg )</p>
|
||
<div class="container flex" style="height: 248px;">
|
||
<div class="mr10">
|
||
<el-upload class="upload-demo" action="abc" :auto-upload="false" :on-change="onHandleChange" :show-file-list="false">
|
||
<el-button size="medium" type="primary">上传图片</el-button>
|
||
</el-upload>
|
||
<p id="file-info">{{ files.name || '未选择文件' }} </p>
|
||
</div>
|
||
<div class="box">
|
||
<div class="imd">
|
||
<img :src="image" v-if="image" style="width: 340px; max-height: 200px"/>
|
||
<img :src="image" ref="imageRef" v-show="false" />
|
||
</div>
|
||
<div class="imd" v-show="false">
|
||
<canvas ref="canvasRef"></canvas>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<h3>提取结果:</h3>
|
||
<div class="textModel noScollLine" v-loading="linadingImg" element-loading-text="图片解析中......">
|
||
<p v-if="texts.length == 0">{{ alertText }}</p>
|
||
<template v-else>
|
||
<p v-for="(text, index) in texts" :key="index">{{ text }}</p>
|
||
</template>
|
||
</div>
|
||
</div>
|
||
<template #footer>
|
||
<el-button @click="handleClose">取消</el-button>
|
||
<el-button type="primary" @click="onComfirm">确认</el-button>
|
||
</template>
|
||
</el-dialog>
|
||
</div>
|
||
</template>
|
||
|
||
<script setup>
|
||
import * as ocr from "@paddlejs-models/ocr";
|
||
import { drawBox } from "@/utils/ocrUtils";
|
||
import { nextTick, onMounted,reactive, ref,getCurrentInstance } from "vue";
|
||
const props = defineProps({
|
||
modelValue: {
|
||
type: Boolean,
|
||
default: false
|
||
}
|
||
});
|
||
const { proxy } = getCurrentInstance();
|
||
const emits = defineEmits(["update:modelValue", "change"]);
|
||
const active = ref('文件解析')
|
||
|
||
const files = ref({})
|
||
const loading = ref(true)
|
||
const linadingImg = ref(false)
|
||
const image = ref('')
|
||
const alertText = ref('请先上传文件...')
|
||
const texts = ref([])
|
||
const imageRef = ref()
|
||
const canvasRef = ref()
|
||
const textStyle = reactive({
|
||
width: "",
|
||
height: ""
|
||
})
|
||
const hasLoadingJS = ref(false)
|
||
|
||
onMounted(() => {
|
||
initPage();
|
||
nextTick(() => {
|
||
const fileInput = document.getElementById("file-input");
|
||
const fileInfo = document.getElementById("file-info");
|
||
const extractBtn = document.getElementById("extract-btn");
|
||
const resultDiv = document.getElementById("result");
|
||
let selectedFile = null;
|
||
// 监听文件选择
|
||
fileInput.addEventListener("change", function (e) {
|
||
if (e.target.files.length > 0) {
|
||
selectedFile = e.target.files[0];
|
||
fileInfo.textContent = `已选择: ${selectedFile.name} (${(
|
||
selectedFile.size / 1024
|
||
).toFixed(2)} KB)`;
|
||
extractBtn.disabled = false;
|
||
} else {
|
||
selectedFile = null;
|
||
fileInfo.textContent = "未选择文件";
|
||
extractBtn.disabled = true;
|
||
}
|
||
if (selectedFile.type == "video/mp4") {
|
||
upfileOnchange(selectedFile);
|
||
}
|
||
});
|
||
// 提取文本按钮点击事件
|
||
extractBtn.addEventListener("click", async function () {
|
||
if (!selectedFile) return (resultDiv.textContent = "请先选择文件");
|
||
resultDiv.textContent = "正在处理文件...";
|
||
try {
|
||
let text = "";
|
||
const fileType = selectedFile.name.split(".").pop().toLowerCase();
|
||
console.log(selectedFile);
|
||
console.log(fileType,'===fileType');
|
||
if (fileType === "txt") {
|
||
// 处理文本文件
|
||
text = await readTextFile(selectedFile);
|
||
} else if (fileType === "pdf") {
|
||
// 处理PDF文件
|
||
text = await extractTextFromPDF(selectedFile);
|
||
} else if (fileType === "docx") {
|
||
// 处理Word文件
|
||
text = await extractTextFromDocx(selectedFile);
|
||
console.log(text, "===word");
|
||
} else if (["mp4", "mp3", "wav"].includes(fileType)) {
|
||
// 处理mp4,mp3,wav文件
|
||
await start();
|
||
text = "数据加载有点慢,请稍等。。。。";
|
||
setTimeout(() => {
|
||
resultDiv.textContent = videoText;
|
||
}, 2000);
|
||
}else {
|
||
throw new Error("不支持的文件类型");
|
||
}
|
||
resultDiv.textContent = text || "未提取到文本内容";
|
||
} catch (error) {
|
||
resultDiv.textContent = `处理失败: ${error.message}`;
|
||
console.error(error);
|
||
}
|
||
});
|
||
});
|
||
});
|
||
|
||
// 读取文本文件
|
||
function readTextFile(file) {
|
||
return new Promise((resolve, reject) => {
|
||
const reader = new FileReader();
|
||
reader.onload = (e) => resolve(e.target.result);
|
||
reader.onerror = (e) => reject(new Error("文件读取失败"));
|
||
reader.readAsText(file);
|
||
});
|
||
}
|
||
|
||
// 提取PDF文本
|
||
async function extractTextFromPDF(file) {
|
||
return new Promise((resolve, reject) => {
|
||
const fileReader = new FileReader();
|
||
|
||
fileReader.onload = async function () {
|
||
try {
|
||
const typedArray = new Uint8Array(this.result);
|
||
const pdf = await pdfjsLib.getDocument(typedArray).promise;
|
||
let fullText = "";
|
||
|
||
for (let i = 1; i <= pdf.numPages; i++) {
|
||
const page = await pdf.getPage(i);
|
||
const textContent = await page.getTextContent();
|
||
const text = textContent.items.map((item) => item.str).join(" ");
|
||
fullText += text + "\n\n";
|
||
}
|
||
|
||
resolve(fullText);
|
||
} catch (error) {
|
||
reject(error);
|
||
}
|
||
};
|
||
|
||
fileReader.onerror = reject;
|
||
fileReader.readAsArrayBuffer(file);
|
||
});
|
||
}
|
||
|
||
// 提取Word文档文本
|
||
async function extractTextFromDocx(file) {
|
||
return new Promise((resolve, reject) => {
|
||
const reader = new FileReader();
|
||
|
||
reader.onload = function (event) {
|
||
const arrayBuffer = event.target.result;
|
||
|
||
mammoth
|
||
.extractRawText({ arrayBuffer: arrayBuffer })
|
||
.then(function (result) {
|
||
resolve(result.value);
|
||
})
|
||
.catch(function (error) {
|
||
reject(error);
|
||
});
|
||
};
|
||
|
||
reader.onerror = reject;
|
||
reader.readAsArrayBuffer(file);
|
||
});
|
||
}
|
||
|
||
/**
|
||
*@Descripttion:图片页面初始化
|
||
*@Author: PengShuai
|
||
*/
|
||
const initPage = async () => {
|
||
loading.value = true;
|
||
try {
|
||
await ocr.init();// 模型初始化
|
||
proxy.$message({ type: "success", message: "加载成功" });
|
||
loading.value = false;
|
||
hasLoadingJS.value = true;
|
||
} catch (err) {
|
||
proxy.$message({ type: "error", message: "加载失败,请刷新页面" });
|
||
loading.value = false;
|
||
hasLoadingJS.value = false;
|
||
}
|
||
}
|
||
/**
|
||
*@Descripttion:图片上传事件
|
||
*@Author: PengShuai
|
||
*@Date: 2023-12-21 10:49:36
|
||
*/
|
||
const onHandleChange = (file) => {
|
||
files.value = file;
|
||
image.value = URL.createObjectURL(file.raw);
|
||
linadingImg.value = true;
|
||
alertText.value = '图片文件解析中。。。'
|
||
setTimeout(() => {
|
||
getRecognize();
|
||
}, 600);
|
||
}
|
||
// 图片解析
|
||
const getRecognize = async () => {
|
||
const image = imageRef.value;
|
||
const canvas = canvasRef.value;
|
||
const res = await ocr.recognize(image);
|
||
const { text, points } = res;
|
||
drawBox(points, image, canvas);
|
||
textStyle.width = image.width - 40 + "px";
|
||
texts.value = text;
|
||
linadingImg.value = false;
|
||
alertText.value = '解析失败,请选择清晰一点的图片重试!'
|
||
}
|
||
|
||
// 切换标签
|
||
const changeRadio = (val) =>{
|
||
if(val == '图片解析') {
|
||
if(!hasLoadingJS.value) initPage()
|
||
}
|
||
const resultDiv = document.getElementById("result");
|
||
resultDiv.textContent = "请先上传文件...";
|
||
const fileInfo = document.getElementById("file-info");
|
||
fileInfo.textContent = "选择文件";
|
||
files.value = {}
|
||
alertText.value = '请先上传文件...';
|
||
texts.value = []
|
||
image.value = ''
|
||
}
|
||
const onComfirm = () => {
|
||
|
||
if(active == '文件解析'){
|
||
const resultDiv = document.getElementById("result");
|
||
let obj = {
|
||
text: resultDiv.textContent
|
||
};
|
||
emits("change", obj);
|
||
}else{
|
||
emits("change", {text:texts.value});
|
||
}
|
||
};
|
||
|
||
// 关闭
|
||
const handleClose = () => {
|
||
const resultDiv = document.getElementById("result");
|
||
resultDiv.textContent = "请先上传文件";
|
||
const fileInfo = document.getElementById("file-info");
|
||
fileInfo.textContent = "未选择文件";
|
||
emits("update:modelValue", false);
|
||
};
|
||
|
||
const chooseFile = () => {
|
||
document.getElementById("file-input").click();
|
||
};
|
||
</script>
|
||
|
||
<style lang="scss" scoped>
|
||
.container {
|
||
border: 2px dashed #ccc;
|
||
padding: 20px;
|
||
text-align: center;
|
||
margin-bottom: 20px;
|
||
}
|
||
|
||
#file-input {
|
||
display: none;
|
||
}
|
||
|
||
button {
|
||
background: #0072ff;
|
||
color: white;
|
||
padding: 10px 15px;
|
||
border: none;
|
||
cursor: pointer;
|
||
font-size: 16px;
|
||
border-radius: 4px;
|
||
}
|
||
|
||
button:hover {
|
||
background: #0072ff;
|
||
}
|
||
|
||
#result {
|
||
margin-top: 20px;
|
||
white-space: pre-wrap;
|
||
background: #f9f9f9;
|
||
padding: 15px;
|
||
border: 1px solid #ddd;
|
||
border-radius: 4px;
|
||
height: 270px;
|
||
overflow-y: auto;
|
||
}
|
||
|
||
#file-info {
|
||
margin: 10px 0;
|
||
font-style: italic;
|
||
color: #666;
|
||
}
|
||
|
||
::v-deep .el-dialog {
|
||
margin-top: 10px;
|
||
}
|
||
|
||
|
||
.box{
|
||
display: flex;
|
||
.imd{
|
||
flex: 1;
|
||
}
|
||
}
|
||
|
||
.textModel{
|
||
margin-top: 20px;
|
||
white-space: pre-wrap;
|
||
background: #f9f9f9;
|
||
padding: 15px;
|
||
border: 1px solid #ddd;
|
||
border-radius: 4px;
|
||
height: 195px;
|
||
overflow-y: auto;
|
||
}
|
||
</style> |