2025-07-16 18:33:13 +08:00
|
|
|
|
<template>
|
|
|
|
|
<div>
|
2025-07-17 17:29:41 +08:00
|
|
|
|
<el-dialog v-model="modelValue" title="文件解析" width="1000px" :show-close="true" :center="true" :close-on-click-modal="false" :before-close="handleClose" >
|
|
|
|
|
<div class="flex align-center">
|
|
|
|
|
<h3>提取文件类型:</h3>
|
|
|
|
|
<el-radio-group v-model="active" @change="changeRadio">
|
|
|
|
|
<el-radio :label="'文件解析'">文件解析</el-radio>
|
|
|
|
|
<el-radio :label="'图片解析'">图片解析</el-radio>
|
|
|
|
|
</el-radio-group>
|
|
|
|
|
</div>
|
|
|
|
|
<!-- 文件解析 -->
|
|
|
|
|
<div v-show="active == '文件解析'">
|
|
|
|
|
<h1>文件文本提取工具</h1>
|
|
|
|
|
<p>上传文件提取文本内容(支持 .txt, .pdf, .docx, mp4 , mp3, wav)</p>
|
|
|
|
|
|
|
|
|
|
<div class="container">
|
|
|
|
|
<input type="file" id="file-input" accept=".txt,.pdf,.docx,'.mp4','.mp3','.wav'"/>
|
|
|
|
|
<button @click="chooseFile">选择文件</button>
|
|
|
|
|
<p id="file-info">未选择文件</p>
|
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<button id="extract-btn" disabled>提取文本</button>
|
|
|
|
|
|
|
|
|
|
<h3>提取结果:</h3>
|
|
|
|
|
<div id="result">请先上传文件...</div>
|
|
|
|
|
</div>
|
|
|
|
|
<!-- 图片解析 -->
|
|
|
|
|
<div v-show="active == '图片解析'" v-loading="loading" element-loading-text="模型加载中......">
|
|
|
|
|
<div class="flex align-center just-between">
|
|
|
|
|
<h1>文件文本提取工具</h1>
|
|
|
|
|
<span title="刷新" class="pointer" >
|
|
|
|
|
<el-icon color="#0072ff" size="30px" @click="initPage"><RefreshRight /></el-icon>
|
|
|
|
|
<el-icon color="#23c044" size="14px" v-if="hasLoadingJS"><CircleCheckFilled /></el-icon>
|
|
|
|
|
<el-icon color="#e60e0e" size="14px" v-if="!hasLoadingJS"><CircleCloseFilled /></el-icon>
|
|
|
|
|
</span>
|
|
|
|
|
</div>
|
|
|
|
|
<p>上传文件提取文本内容(支持 .png, .jpg )</p>
|
|
|
|
|
<div class="container flex" style="height: 248px;">
|
|
|
|
|
<div class="mr10">
|
|
|
|
|
<el-upload class="upload-demo" action="abc" :auto-upload="false" :on-change="onHandleChange" :show-file-list="false">
|
|
|
|
|
<el-button size="medium" type="primary">上传图片</el-button>
|
|
|
|
|
</el-upload>
|
|
|
|
|
<p id="file-info">{{ files.name || '未选择文件' }} </p>
|
|
|
|
|
</div>
|
|
|
|
|
<div class="box">
|
|
|
|
|
<div class="imd">
|
|
|
|
|
<img :src="image" v-if="image" style="width: 340px; max-height: 200px"/>
|
|
|
|
|
<img :src="image" ref="imageRef" v-show="false" />
|
|
|
|
|
</div>
|
|
|
|
|
<div class="imd" v-show="false">
|
|
|
|
|
<canvas ref="canvasRef"></canvas>
|
|
|
|
|
</div>
|
|
|
|
|
</div>
|
|
|
|
|
</div>
|
|
|
|
|
<h3>提取结果:</h3>
|
|
|
|
|
<div class="textModel noScollLine" v-loading="linadingImg" element-loading-text="图片解析中......">
|
|
|
|
|
<p v-if="texts.length == 0">{{ alertText }}</p>
|
|
|
|
|
<template v-else>
|
|
|
|
|
<p v-for="(text, index) in texts" :key="index">{{ text }}</p>
|
|
|
|
|
</template>
|
|
|
|
|
</div>
|
2025-07-16 18:33:13 +08:00
|
|
|
|
</div>
|
|
|
|
|
<template #footer>
|
|
|
|
|
<el-button @click="handleClose">取消</el-button>
|
|
|
|
|
<el-button type="primary" @click="onComfirm">确认</el-button>
|
|
|
|
|
</template>
|
|
|
|
|
</el-dialog>
|
|
|
|
|
</div>
|
|
|
|
|
</template>
|
|
|
|
|
|
|
|
|
|
<script setup>
|
2025-07-17 17:29:41 +08:00
|
|
|
|
import * as ocr from "@paddlejs-models/ocr";
|
|
|
|
|
import { drawBox } from "@/utils/ocrUtils";
|
|
|
|
|
import { nextTick, onMounted,reactive, ref,getCurrentInstance } from "vue";
|
2025-07-16 18:33:13 +08:00
|
|
|
|
const props = defineProps({
|
|
|
|
|
modelValue: {
|
|
|
|
|
type: Boolean,
|
|
|
|
|
default: false
|
|
|
|
|
}
|
|
|
|
|
});
|
2025-07-17 17:29:41 +08:00
|
|
|
|
const { proxy } = getCurrentInstance();
|
2025-07-16 18:33:13 +08:00
|
|
|
|
const emits = defineEmits(["update:modelValue", "change"]);
|
2025-07-17 17:29:41 +08:00
|
|
|
|
const active = ref('文件解析')
|
|
|
|
|
|
|
|
|
|
const files = ref({})
|
|
|
|
|
const loading = ref(true)
|
|
|
|
|
const linadingImg = ref(false)
|
|
|
|
|
const image = ref('')
|
|
|
|
|
const alertText = ref('请先上传文件...')
|
|
|
|
|
const texts = ref([])
|
|
|
|
|
const imageRef = ref()
|
|
|
|
|
const canvasRef = ref()
|
|
|
|
|
const textStyle = reactive({
|
|
|
|
|
width: "",
|
|
|
|
|
height: ""
|
|
|
|
|
})
|
|
|
|
|
const hasLoadingJS = ref(false)
|
|
|
|
|
|
2025-07-16 18:33:13 +08:00
|
|
|
|
onMounted(() => {
|
2025-07-17 17:29:41 +08:00
|
|
|
|
initPage();
|
2025-07-16 18:33:13 +08:00
|
|
|
|
nextTick(() => {
|
|
|
|
|
const fileInput = document.getElementById("file-input");
|
|
|
|
|
const fileInfo = document.getElementById("file-info");
|
|
|
|
|
const extractBtn = document.getElementById("extract-btn");
|
|
|
|
|
const resultDiv = document.getElementById("result");
|
|
|
|
|
let selectedFile = null;
|
|
|
|
|
// 监听文件选择
|
|
|
|
|
fileInput.addEventListener("change", function (e) {
|
|
|
|
|
if (e.target.files.length > 0) {
|
|
|
|
|
selectedFile = e.target.files[0];
|
|
|
|
|
fileInfo.textContent = `已选择: ${selectedFile.name} (${(
|
|
|
|
|
selectedFile.size / 1024
|
|
|
|
|
).toFixed(2)} KB)`;
|
|
|
|
|
extractBtn.disabled = false;
|
|
|
|
|
} else {
|
|
|
|
|
selectedFile = null;
|
|
|
|
|
fileInfo.textContent = "未选择文件";
|
|
|
|
|
extractBtn.disabled = true;
|
|
|
|
|
}
|
|
|
|
|
if (selectedFile.type == "video/mp4") {
|
|
|
|
|
upfileOnchange(selectedFile);
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
// 提取文本按钮点击事件
|
|
|
|
|
extractBtn.addEventListener("click", async function () {
|
|
|
|
|
if (!selectedFile) return (resultDiv.textContent = "请先选择文件");
|
|
|
|
|
resultDiv.textContent = "正在处理文件...";
|
|
|
|
|
try {
|
|
|
|
|
let text = "";
|
|
|
|
|
const fileType = selectedFile.name.split(".").pop().toLowerCase();
|
|
|
|
|
console.log(selectedFile);
|
2025-07-17 17:29:41 +08:00
|
|
|
|
console.log(fileType,'===fileType');
|
2025-07-16 18:33:13 +08:00
|
|
|
|
if (fileType === "txt") {
|
|
|
|
|
// 处理文本文件
|
|
|
|
|
text = await readTextFile(selectedFile);
|
|
|
|
|
} else if (fileType === "pdf") {
|
|
|
|
|
// 处理PDF文件
|
|
|
|
|
text = await extractTextFromPDF(selectedFile);
|
|
|
|
|
} else if (fileType === "docx") {
|
|
|
|
|
// 处理Word文件
|
|
|
|
|
text = await extractTextFromDocx(selectedFile);
|
|
|
|
|
console.log(text, "===word");
|
|
|
|
|
} else if (["mp4", "mp3", "wav"].includes(fileType)) {
|
|
|
|
|
// 处理mp4,mp3,wav文件
|
|
|
|
|
await start();
|
|
|
|
|
text = "数据加载有点慢,请稍等。。。。";
|
|
|
|
|
setTimeout(() => {
|
|
|
|
|
resultDiv.textContent = videoText;
|
|
|
|
|
}, 2000);
|
2025-07-17 17:29:41 +08:00
|
|
|
|
}else {
|
2025-07-16 18:33:13 +08:00
|
|
|
|
throw new Error("不支持的文件类型");
|
|
|
|
|
}
|
|
|
|
|
resultDiv.textContent = text || "未提取到文本内容";
|
|
|
|
|
} catch (error) {
|
|
|
|
|
resultDiv.textContent = `处理失败: ${error.message}`;
|
|
|
|
|
console.error(error);
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// 读取文本文件
|
|
|
|
|
function readTextFile(file) {
|
|
|
|
|
return new Promise((resolve, reject) => {
|
|
|
|
|
const reader = new FileReader();
|
|
|
|
|
reader.onload = (e) => resolve(e.target.result);
|
|
|
|
|
reader.onerror = (e) => reject(new Error("文件读取失败"));
|
|
|
|
|
reader.readAsText(file);
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 提取PDF文本
|
|
|
|
|
async function extractTextFromPDF(file) {
|
|
|
|
|
return new Promise((resolve, reject) => {
|
|
|
|
|
const fileReader = new FileReader();
|
|
|
|
|
|
|
|
|
|
fileReader.onload = async function () {
|
|
|
|
|
try {
|
|
|
|
|
const typedArray = new Uint8Array(this.result);
|
|
|
|
|
const pdf = await pdfjsLib.getDocument(typedArray).promise;
|
|
|
|
|
let fullText = "";
|
|
|
|
|
|
|
|
|
|
for (let i = 1; i <= pdf.numPages; i++) {
|
|
|
|
|
const page = await pdf.getPage(i);
|
|
|
|
|
const textContent = await page.getTextContent();
|
|
|
|
|
const text = textContent.items.map((item) => item.str).join(" ");
|
|
|
|
|
fullText += text + "\n\n";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
resolve(fullText);
|
|
|
|
|
} catch (error) {
|
|
|
|
|
reject(error);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
fileReader.onerror = reject;
|
|
|
|
|
fileReader.readAsArrayBuffer(file);
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 提取Word文档文本
|
|
|
|
|
async function extractTextFromDocx(file) {
|
|
|
|
|
return new Promise((resolve, reject) => {
|
|
|
|
|
const reader = new FileReader();
|
|
|
|
|
|
|
|
|
|
reader.onload = function (event) {
|
|
|
|
|
const arrayBuffer = event.target.result;
|
|
|
|
|
|
|
|
|
|
mammoth
|
|
|
|
|
.extractRawText({ arrayBuffer: arrayBuffer })
|
|
|
|
|
.then(function (result) {
|
|
|
|
|
resolve(result.value);
|
|
|
|
|
})
|
|
|
|
|
.catch(function (error) {
|
|
|
|
|
reject(error);
|
|
|
|
|
});
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
reader.onerror = reject;
|
|
|
|
|
reader.readAsArrayBuffer(file);
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
2025-07-17 17:29:41 +08:00
|
|
|
|
/**
|
|
|
|
|
*@Descripttion:图片页面初始化
|
|
|
|
|
*@Author: PengShuai
|
|
|
|
|
*/
|
|
|
|
|
const initPage = async () => {
|
|
|
|
|
loading.value = true;
|
|
|
|
|
try {
|
|
|
|
|
await ocr.init();// 模型初始化
|
|
|
|
|
proxy.$message({ type: "success", message: "加载成功" });
|
|
|
|
|
loading.value = false;
|
|
|
|
|
hasLoadingJS.value = true;
|
|
|
|
|
} catch (err) {
|
|
|
|
|
proxy.$message({ type: "error", message: "加载失败,请刷新页面" });
|
|
|
|
|
loading.value = false;
|
|
|
|
|
hasLoadingJS.value = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
/**
|
|
|
|
|
*@Descripttion:图片上传事件
|
|
|
|
|
*@Author: PengShuai
|
|
|
|
|
*@Date: 2023-12-21 10:49:36
|
|
|
|
|
*/
|
|
|
|
|
const onHandleChange = (file) => {
|
|
|
|
|
files.value = file;
|
|
|
|
|
image.value = URL.createObjectURL(file.raw);
|
|
|
|
|
linadingImg.value = true;
|
|
|
|
|
alertText.value = '图片文件解析中。。。'
|
|
|
|
|
setTimeout(() => {
|
|
|
|
|
getRecognize();
|
|
|
|
|
}, 600);
|
|
|
|
|
}
|
|
|
|
|
// 图片解析
|
|
|
|
|
const getRecognize = async () => {
|
|
|
|
|
const image = imageRef.value;
|
|
|
|
|
const canvas = canvasRef.value;
|
|
|
|
|
const res = await ocr.recognize(image);
|
|
|
|
|
const { text, points } = res;
|
|
|
|
|
drawBox(points, image, canvas);
|
|
|
|
|
textStyle.width = image.width - 40 + "px";
|
|
|
|
|
texts.value = text;
|
|
|
|
|
linadingImg.value = false;
|
|
|
|
|
alertText.value = '解析失败,请选择清晰一点的图片重试!'
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 切换标签
|
|
|
|
|
const changeRadio = (val) =>{
|
|
|
|
|
if(val == '图片解析') {
|
|
|
|
|
if(!hasLoadingJS.value) initPage()
|
|
|
|
|
}
|
2025-07-16 18:33:13 +08:00
|
|
|
|
const resultDiv = document.getElementById("result");
|
2025-07-17 17:29:41 +08:00
|
|
|
|
resultDiv.textContent = "请先上传文件...";
|
|
|
|
|
const fileInfo = document.getElementById("file-info");
|
|
|
|
|
fileInfo.textContent = "选择文件";
|
|
|
|
|
files.value = {}
|
|
|
|
|
alertText.value = '请先上传文件...';
|
|
|
|
|
texts.value = []
|
|
|
|
|
}
|
|
|
|
|
const onComfirm = () => {
|
|
|
|
|
|
|
|
|
|
if(active == '文件解析'){
|
|
|
|
|
const resultDiv = document.getElementById("result");
|
|
|
|
|
let obj = {
|
|
|
|
|
text: resultDiv.textContent
|
|
|
|
|
};
|
|
|
|
|
emits("change", obj);
|
|
|
|
|
}else{
|
|
|
|
|
emits("change", {text:texts.value});
|
|
|
|
|
}
|
2025-07-16 18:33:13 +08:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// 关闭
|
|
|
|
|
const handleClose = () => {
|
|
|
|
|
const resultDiv = document.getElementById("result");
|
|
|
|
|
resultDiv.textContent = "请先上传文件";
|
|
|
|
|
const fileInfo = document.getElementById("file-info");
|
|
|
|
|
fileInfo.textContent = "未选择文件";
|
|
|
|
|
emits("update:modelValue", false);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const chooseFile = () => {
|
|
|
|
|
document.getElementById("file-input").click();
|
|
|
|
|
};
|
|
|
|
|
</script>
|
|
|
|
|
|
|
|
|
|
<style lang="scss" scoped>
|
|
|
|
|
.container {
|
|
|
|
|
border: 2px dashed #ccc;
|
|
|
|
|
padding: 20px;
|
|
|
|
|
text-align: center;
|
|
|
|
|
margin-bottom: 20px;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#file-input {
|
|
|
|
|
display: none;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
button {
|
2025-07-17 17:29:41 +08:00
|
|
|
|
background: #0072ff;
|
2025-07-16 18:33:13 +08:00
|
|
|
|
color: white;
|
|
|
|
|
padding: 10px 15px;
|
|
|
|
|
border: none;
|
|
|
|
|
cursor: pointer;
|
|
|
|
|
font-size: 16px;
|
|
|
|
|
border-radius: 4px;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
button:hover {
|
2025-07-17 17:29:41 +08:00
|
|
|
|
background: #0072ff;
|
2025-07-16 18:33:13 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#result {
|
|
|
|
|
margin-top: 20px;
|
|
|
|
|
white-space: pre-wrap;
|
|
|
|
|
background: #f9f9f9;
|
|
|
|
|
padding: 15px;
|
|
|
|
|
border: 1px solid #ddd;
|
|
|
|
|
border-radius: 4px;
|
2025-07-17 17:29:41 +08:00
|
|
|
|
height: 270px;
|
2025-07-16 18:33:13 +08:00
|
|
|
|
overflow-y: auto;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#file-info {
|
|
|
|
|
margin: 10px 0;
|
|
|
|
|
font-style: italic;
|
|
|
|
|
color: #666;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
::v-deep .el-dialog {
|
|
|
|
|
margin-top: 10px;
|
|
|
|
|
}
|
2025-07-17 17:29:41 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.box{
|
|
|
|
|
display: flex;
|
|
|
|
|
.imd{
|
|
|
|
|
flex: 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
.textModel{
|
|
|
|
|
margin-top: 20px;
|
|
|
|
|
white-space: pre-wrap;
|
|
|
|
|
background: #f9f9f9;
|
|
|
|
|
padding: 15px;
|
|
|
|
|
border: 1px solid #ddd;
|
|
|
|
|
border-radius: 4px;
|
|
|
|
|
height: 195px;
|
|
|
|
|
overflow-y: auto;
|
|
|
|
|
}
|
2025-07-16 18:33:13 +08:00
|
|
|
|
</style>
|