Files
sgxt_web/src/components/ExtractionText/index.vue
2025-07-17 17:44:24 +08:00

374 lines
11 KiB
Vue
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<template>
<div>
<el-dialog v-model="modelValue" title="文件解析" width="1000px" :show-close="true" :center="true" :close-on-click-modal="false" :before-close="handleClose" >
<div class="flex align-center">
<h3>提取文件类型</h3>
<el-radio-group v-model="active" @change="changeRadio">
<el-radio :label="'文件解析'">文件解析</el-radio>
<el-radio :label="'图片解析'">图片解析</el-radio>
</el-radio-group>
</div>
<!-- 文件解析 -->
<div v-show="active == '文件解析'">
<h1>文件文本提取工具</h1>
<p>上传文件提取文本内容支持 .txt, .pdf, .docx, mp4 , mp3, wav</p>
<div class="container">
<input type="file" id="file-input" accept=".txt,.pdf,.docx,'.mp4','.mp3','.wav'"/>
<button @click="chooseFile">选择文件</button>
<p id="file-info">未选择文件</p>
</div>
<button id="extract-btn" disabled>提取文本</button>
<h3>提取结果</h3>
<div id="result">请先上传文件...</div>
</div>
<!-- 图片解析 -->
<div v-show="active == '图片解析'" v-loading="loading" element-loading-text="模型加载中......">
<div class="flex align-center just-between">
<h1>文件文本提取工具</h1>
<span title="刷新" class="pointer" >
<el-icon color="#0072ff" size="30px" @click="initPage"><RefreshRight /></el-icon>
<el-icon color="#23c044" size="14px" v-if="hasLoadingJS"><CircleCheckFilled /></el-icon>
<el-icon color="#e60e0e" size="14px" v-if="!hasLoadingJS"><CircleCloseFilled /></el-icon>
</span>
</div>
<p>上传文件提取文本内容支持 .png, .jpg </p>
<div class="container flex" style="height: 248px;">
<div class="mr10">
<el-upload class="upload-demo" action="abc" :auto-upload="false" :on-change="onHandleChange" :show-file-list="false">
<el-button size="medium" type="primary">上传图片</el-button>
</el-upload>
<p id="file-info">{{ files.name || '未选择文件' }} </p>
</div>
<div class="box">
<div class="imd">
<img :src="image" v-if="image" style="width: 340px; max-height: 200px"/>
<img :src="image" ref="imageRef" v-show="false" />
</div>
<div class="imd" v-show="false">
<canvas ref="canvasRef"></canvas>
</div>
</div>
</div>
<h3>提取结果</h3>
<div class="textModel noScollLine" v-loading="linadingImg" element-loading-text="图片解析中......">
<p v-if="texts.length == 0">{{ alertText }}</p>
<template v-else>
<p v-for="(text, index) in texts" :key="index">{{ text }}</p>
</template>
</div>
</div>
<template #footer>
<el-button @click="handleClose">取消</el-button>
<el-button type="primary" @click="onComfirm">确认</el-button>
</template>
</el-dialog>
</div>
</template>
<script setup>
import * as ocr from "@paddlejs-models/ocr";
import { drawBox } from "@/utils/ocrUtils";
import { nextTick, onMounted,reactive, ref,getCurrentInstance } from "vue";
const props = defineProps({
modelValue: {
type: Boolean,
default: false
}
});
const { proxy } = getCurrentInstance();
const emits = defineEmits(["update:modelValue", "change"]);
const active = ref('文件解析')
const files = ref({})
const loading = ref(true)
const linadingImg = ref(false)
const image = ref('')
const alertText = ref('请先上传文件...')
const texts = ref([])
const imageRef = ref()
const canvasRef = ref()
const textStyle = reactive({
width: "",
height: ""
})
const hasLoadingJS = ref(false)
onMounted(() => {
initPage();
nextTick(() => {
const fileInput = document.getElementById("file-input");
const fileInfo = document.getElementById("file-info");
const extractBtn = document.getElementById("extract-btn");
const resultDiv = document.getElementById("result");
let selectedFile = null;
// 监听文件选择
fileInput.addEventListener("change", function (e) {
if (e.target.files.length > 0) {
selectedFile = e.target.files[0];
fileInfo.textContent = `已选择: ${selectedFile.name} (${(
selectedFile.size / 1024
).toFixed(2)} KB)`;
extractBtn.disabled = false;
} else {
selectedFile = null;
fileInfo.textContent = "未选择文件";
extractBtn.disabled = true;
}
if (selectedFile.type == "video/mp4") {
upfileOnchange(selectedFile);
}
});
// 提取文本按钮点击事件
extractBtn.addEventListener("click", async function () {
if (!selectedFile) return (resultDiv.textContent = "请先选择文件");
resultDiv.textContent = "正在处理文件...";
try {
let text = "";
const fileType = selectedFile.name.split(".").pop().toLowerCase();
console.log(selectedFile);
console.log(fileType,'===fileType');
if (fileType === "txt") {
// 处理文本文件
text = await readTextFile(selectedFile);
} else if (fileType === "pdf") {
// 处理PDF文件
text = await extractTextFromPDF(selectedFile);
} else if (fileType === "docx") {
// 处理Word文件
text = await extractTextFromDocx(selectedFile);
console.log(text, "===word");
} else if (["mp4", "mp3", "wav"].includes(fileType)) {
// 处理mp4,mp3,wav文件
await start();
text = "数据加载有点慢,请稍等。。。。";
setTimeout(() => {
resultDiv.textContent = videoText;
}, 2000);
}else {
throw new Error("不支持的文件类型");
}
resultDiv.textContent = text || "未提取到文本内容";
} catch (error) {
resultDiv.textContent = `处理失败: ${error.message}`;
console.error(error);
}
});
});
});
// 读取文本文件
function readTextFile(file) {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = (e) => resolve(e.target.result);
reader.onerror = (e) => reject(new Error("文件读取失败"));
reader.readAsText(file);
});
}
// 提取PDF文本
async function extractTextFromPDF(file) {
return new Promise((resolve, reject) => {
const fileReader = new FileReader();
fileReader.onload = async function () {
try {
const typedArray = new Uint8Array(this.result);
const pdf = await pdfjsLib.getDocument(typedArray).promise;
let fullText = "";
for (let i = 1; i <= pdf.numPages; i++) {
const page = await pdf.getPage(i);
const textContent = await page.getTextContent();
const text = textContent.items.map((item) => item.str).join(" ");
fullText += text + "\n\n";
}
resolve(fullText);
} catch (error) {
reject(error);
}
};
fileReader.onerror = reject;
fileReader.readAsArrayBuffer(file);
});
}
// 提取Word文档文本
async function extractTextFromDocx(file) {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = function (event) {
const arrayBuffer = event.target.result;
mammoth
.extractRawText({ arrayBuffer: arrayBuffer })
.then(function (result) {
resolve(result.value);
})
.catch(function (error) {
reject(error);
});
};
reader.onerror = reject;
reader.readAsArrayBuffer(file);
});
}
/**
*@Descripttion:图片页面初始化
*@Author: PengShuai
*/
const initPage = async () => {
loading.value = true;
try {
await ocr.init();// 模型初始化
proxy.$message({ type: "success", message: "加载成功" });
loading.value = false;
hasLoadingJS.value = true;
} catch (err) {
proxy.$message({ type: "error", message: "加载失败,请刷新页面" });
loading.value = false;
hasLoadingJS.value = false;
}
}
/**
*@Descripttion:图片上传事件
*@Author: PengShuai
*@Date: 2023-12-21 10:49:36
*/
const onHandleChange = (file) => {
files.value = file;
image.value = URL.createObjectURL(file.raw);
linadingImg.value = true;
alertText.value = '图片文件解析中。。。'
setTimeout(() => {
getRecognize();
}, 600);
}
// 图片解析
const getRecognize = async () => {
const image = imageRef.value;
const canvas = canvasRef.value;
const res = await ocr.recognize(image);
const { text, points } = res;
drawBox(points, image, canvas);
textStyle.width = image.width - 40 + "px";
texts.value = text;
linadingImg.value = false;
alertText.value = '解析失败,请选择清晰一点的图片重试!'
}
// 切换标签
const changeRadio = (val) =>{
if(val == '图片解析') {
if(!hasLoadingJS.value) initPage()
}
const resultDiv = document.getElementById("result");
resultDiv.textContent = "请先上传文件...";
const fileInfo = document.getElementById("file-info");
fileInfo.textContent = "选择文件";
files.value = {}
alertText.value = '请先上传文件...';
texts.value = []
image.value = ''
}
const onComfirm = () => {
if(active == '文件解析'){
const resultDiv = document.getElementById("result");
let obj = {
text: resultDiv.textContent
};
emits("change", obj);
}else{
emits("change", {text:texts.value});
}
};
// 关闭
const handleClose = () => {
const resultDiv = document.getElementById("result");
resultDiv.textContent = "请先上传文件";
const fileInfo = document.getElementById("file-info");
fileInfo.textContent = "未选择文件";
emits("update:modelValue", false);
};
const chooseFile = () => {
document.getElementById("file-input").click();
};
</script>
<style lang="scss" scoped>
.container {
border: 2px dashed #ccc;
padding: 20px;
text-align: center;
margin-bottom: 20px;
}
#file-input {
display: none;
}
button {
background: #0072ff;
color: white;
padding: 10px 15px;
border: none;
cursor: pointer;
font-size: 16px;
border-radius: 4px;
}
button:hover {
background: #0072ff;
}
#result {
margin-top: 20px;
white-space: pre-wrap;
background: #f9f9f9;
padding: 15px;
border: 1px solid #ddd;
border-radius: 4px;
height: 270px;
overflow-y: auto;
}
#file-info {
margin: 10px 0;
font-style: italic;
color: #666;
}
::v-deep .el-dialog {
margin-top: 10px;
}
.box{
display: flex;
.imd{
flex: 1;
}
}
.textModel{
margin-top: 20px;
white-space: pre-wrap;
background: #f9f9f9;
padding: 15px;
border: 1px solid #ddd;
border-radius: 4px;
height: 195px;
overflow-y: auto;
}
</style>