Files
sgxt_web/src/components/ExtractionText/index.vue

373 lines
11 KiB
Vue
Raw Normal View History

2025-07-16 18:33:13 +08:00
<template>
<div>
2025-07-17 17:29:41 +08:00
<el-dialog v-model="modelValue" title="文件解析" width="1000px" :show-close="true" :center="true" :close-on-click-modal="false" :before-close="handleClose" >
<div class="flex align-center">
<h3>提取文件类型</h3>
<el-radio-group v-model="active" @change="changeRadio">
<el-radio :label="'文件解析'">文件解析</el-radio>
<el-radio :label="'图片解析'">图片解析</el-radio>
</el-radio-group>
</div>
<!-- 文件解析 -->
<div v-show="active == '文件解析'">
<h1>文件文本提取工具</h1>
<p>上传文件提取文本内容支持 .txt, .pdf, .docx, mp4 , mp3, wav</p>
<div class="container">
<input type="file" id="file-input" accept=".txt,.pdf,.docx,'.mp4','.mp3','.wav'"/>
<button @click="chooseFile">选择文件</button>
<p id="file-info">未选择文件</p>
</div>
<button id="extract-btn" disabled>提取文本</button>
<h3>提取结果</h3>
<div id="result">请先上传文件...</div>
</div>
<!-- 图片解析 -->
<div v-show="active == '图片解析'" v-loading="loading" element-loading-text="模型加载中......">
<div class="flex align-center just-between">
<h1>文件文本提取工具</h1>
<span title="刷新" class="pointer" >
<el-icon color="#0072ff" size="30px" @click="initPage"><RefreshRight /></el-icon>
<el-icon color="#23c044" size="14px" v-if="hasLoadingJS"><CircleCheckFilled /></el-icon>
<el-icon color="#e60e0e" size="14px" v-if="!hasLoadingJS"><CircleCloseFilled /></el-icon>
</span>
</div>
<p>上传文件提取文本内容支持 .png, .jpg </p>
<div class="container flex" style="height: 248px;">
<div class="mr10">
<el-upload class="upload-demo" action="abc" :auto-upload="false" :on-change="onHandleChange" :show-file-list="false">
<el-button size="medium" type="primary">上传图片</el-button>
</el-upload>
<p id="file-info">{{ files.name || '未选择文件' }} </p>
</div>
<div class="box">
<div class="imd">
<img :src="image" v-if="image" style="width: 340px; max-height: 200px"/>
<img :src="image" ref="imageRef" v-show="false" />
</div>
<div class="imd" v-show="false">
<canvas ref="canvasRef"></canvas>
</div>
</div>
</div>
<h3>提取结果</h3>
<div class="textModel noScollLine" v-loading="linadingImg" element-loading-text="图片解析中......">
<p v-if="texts.length == 0">{{ alertText }}</p>
<template v-else>
<p v-for="(text, index) in texts" :key="index">{{ text }}</p>
</template>
</div>
2025-07-16 18:33:13 +08:00
</div>
<template #footer>
<el-button @click="handleClose">取消</el-button>
<el-button type="primary" @click="onComfirm">确认</el-button>
</template>
</el-dialog>
</div>
</template>
<script setup>
2025-07-17 17:29:41 +08:00
import * as ocr from "@paddlejs-models/ocr";
import { drawBox } from "@/utils/ocrUtils";
import { nextTick, onMounted,reactive, ref,getCurrentInstance } from "vue";
2025-07-16 18:33:13 +08:00
const props = defineProps({
modelValue: {
type: Boolean,
default: false
}
});
2025-07-17 17:29:41 +08:00
const { proxy } = getCurrentInstance();
2025-07-16 18:33:13 +08:00
const emits = defineEmits(["update:modelValue", "change"]);
2025-07-17 17:29:41 +08:00
const active = ref('文件解析')
const files = ref({})
const loading = ref(true)
const linadingImg = ref(false)
const image = ref('')
const alertText = ref('请先上传文件...')
const texts = ref([])
const imageRef = ref()
const canvasRef = ref()
const textStyle = reactive({
width: "",
height: ""
})
const hasLoadingJS = ref(false)
2025-07-16 18:33:13 +08:00
onMounted(() => {
2025-07-17 17:29:41 +08:00
initPage();
2025-07-16 18:33:13 +08:00
nextTick(() => {
const fileInput = document.getElementById("file-input");
const fileInfo = document.getElementById("file-info");
const extractBtn = document.getElementById("extract-btn");
const resultDiv = document.getElementById("result");
let selectedFile = null;
// 监听文件选择
fileInput.addEventListener("change", function (e) {
if (e.target.files.length > 0) {
selectedFile = e.target.files[0];
fileInfo.textContent = `已选择: ${selectedFile.name} (${(
selectedFile.size / 1024
).toFixed(2)} KB)`;
extractBtn.disabled = false;
} else {
selectedFile = null;
fileInfo.textContent = "未选择文件";
extractBtn.disabled = true;
}
if (selectedFile.type == "video/mp4") {
upfileOnchange(selectedFile);
}
});
// 提取文本按钮点击事件
extractBtn.addEventListener("click", async function () {
if (!selectedFile) return (resultDiv.textContent = "请先选择文件");
resultDiv.textContent = "正在处理文件...";
try {
let text = "";
const fileType = selectedFile.name.split(".").pop().toLowerCase();
console.log(selectedFile);
2025-07-17 17:29:41 +08:00
console.log(fileType,'===fileType');
2025-07-16 18:33:13 +08:00
if (fileType === "txt") {
// 处理文本文件
text = await readTextFile(selectedFile);
} else if (fileType === "pdf") {
// 处理PDF文件
text = await extractTextFromPDF(selectedFile);
} else if (fileType === "docx") {
// 处理Word文件
text = await extractTextFromDocx(selectedFile);
console.log(text, "===word");
} else if (["mp4", "mp3", "wav"].includes(fileType)) {
// 处理mp4,mp3,wav文件
await start();
text = "数据加载有点慢,请稍等。。。。";
setTimeout(() => {
resultDiv.textContent = videoText;
}, 2000);
2025-07-17 17:29:41 +08:00
}else {
2025-07-16 18:33:13 +08:00
throw new Error("不支持的文件类型");
}
resultDiv.textContent = text || "未提取到文本内容";
} catch (error) {
resultDiv.textContent = `处理失败: ${error.message}`;
console.error(error);
}
});
});
});
// 读取文本文件
function readTextFile(file) {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = (e) => resolve(e.target.result);
reader.onerror = (e) => reject(new Error("文件读取失败"));
reader.readAsText(file);
});
}
// 提取PDF文本
async function extractTextFromPDF(file) {
return new Promise((resolve, reject) => {
const fileReader = new FileReader();
fileReader.onload = async function () {
try {
const typedArray = new Uint8Array(this.result);
const pdf = await pdfjsLib.getDocument(typedArray).promise;
let fullText = "";
for (let i = 1; i <= pdf.numPages; i++) {
const page = await pdf.getPage(i);
const textContent = await page.getTextContent();
const text = textContent.items.map((item) => item.str).join(" ");
fullText += text + "\n\n";
}
resolve(fullText);
} catch (error) {
reject(error);
}
};
fileReader.onerror = reject;
fileReader.readAsArrayBuffer(file);
});
}
// 提取Word文档文本
async function extractTextFromDocx(file) {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = function (event) {
const arrayBuffer = event.target.result;
mammoth
.extractRawText({ arrayBuffer: arrayBuffer })
.then(function (result) {
resolve(result.value);
})
.catch(function (error) {
reject(error);
});
};
reader.onerror = reject;
reader.readAsArrayBuffer(file);
});
}
2025-07-17 17:29:41 +08:00
/**
*@Descripttion:图片页面初始化
*@Author: PengShuai
*/
const initPage = async () => {
loading.value = true;
try {
await ocr.init();// 模型初始化
proxy.$message({ type: "success", message: "加载成功" });
loading.value = false;
hasLoadingJS.value = true;
} catch (err) {
proxy.$message({ type: "error", message: "加载失败,请刷新页面" });
loading.value = false;
hasLoadingJS.value = false;
}
}
/**
*@Descripttion:图片上传事件
*@Author: PengShuai
*@Date: 2023-12-21 10:49:36
*/
const onHandleChange = (file) => {
files.value = file;
image.value = URL.createObjectURL(file.raw);
linadingImg.value = true;
alertText.value = '图片文件解析中。。。'
setTimeout(() => {
getRecognize();
}, 600);
}
// 图片解析
const getRecognize = async () => {
const image = imageRef.value;
const canvas = canvasRef.value;
const res = await ocr.recognize(image);
const { text, points } = res;
drawBox(points, image, canvas);
textStyle.width = image.width - 40 + "px";
texts.value = text;
linadingImg.value = false;
alertText.value = '解析失败,请选择清晰一点的图片重试!'
}
// 切换标签
const changeRadio = (val) =>{
if(val == '图片解析') {
if(!hasLoadingJS.value) initPage()
}
2025-07-16 18:33:13 +08:00
const resultDiv = document.getElementById("result");
2025-07-17 17:29:41 +08:00
resultDiv.textContent = "请先上传文件...";
const fileInfo = document.getElementById("file-info");
fileInfo.textContent = "选择文件";
files.value = {}
alertText.value = '请先上传文件...';
texts.value = []
}
const onComfirm = () => {
if(active == '文件解析'){
const resultDiv = document.getElementById("result");
let obj = {
text: resultDiv.textContent
};
emits("change", obj);
}else{
emits("change", {text:texts.value});
}
2025-07-16 18:33:13 +08:00
};
// 关闭
const handleClose = () => {
const resultDiv = document.getElementById("result");
resultDiv.textContent = "请先上传文件";
const fileInfo = document.getElementById("file-info");
fileInfo.textContent = "未选择文件";
emits("update:modelValue", false);
};
const chooseFile = () => {
document.getElementById("file-input").click();
};
</script>
<style lang="scss" scoped>
.container {
border: 2px dashed #ccc;
padding: 20px;
text-align: center;
margin-bottom: 20px;
}
#file-input {
display: none;
}
button {
2025-07-17 17:29:41 +08:00
background: #0072ff;
2025-07-16 18:33:13 +08:00
color: white;
padding: 10px 15px;
border: none;
cursor: pointer;
font-size: 16px;
border-radius: 4px;
}
button:hover {
2025-07-17 17:29:41 +08:00
background: #0072ff;
2025-07-16 18:33:13 +08:00
}
#result {
margin-top: 20px;
white-space: pre-wrap;
background: #f9f9f9;
padding: 15px;
border: 1px solid #ddd;
border-radius: 4px;
2025-07-17 17:29:41 +08:00
height: 270px;
2025-07-16 18:33:13 +08:00
overflow-y: auto;
}
#file-info {
margin: 10px 0;
font-style: italic;
color: #666;
}
::v-deep .el-dialog {
margin-top: 10px;
}
2025-07-17 17:29:41 +08:00
.box{
display: flex;
.imd{
flex: 1;
}
}
.textModel{
margin-top: 20px;
white-space: pre-wrap;
background: #f9f9f9;
padding: 15px;
border: 1px solid #ddd;
border-radius: 4px;
height: 195px;
overflow-y: auto;
}
2025-07-16 18:33:13 +08:00
</style>