sgxt_web/src/components/ExtractionText/index.vue

<template>
  <div>
    <el-dialog v-model="modelValue" title="文件解析" width="1000px" :show-close="true" :center="true" :close-on-click-modal="false" :before-close="handleClose" >
      <div class="flex align-center">
        <h3>提取文件类型：</h3>
         <el-radio-group v-model="active" @change="changeRadio">
          <el-radio :label="'文件解析'">文件解析</el-radio>
          <el-radio :label="'图片解析'">图片解析</el-radio>
        </el-radio-group>
      </div>
      <!-- 文件解析 -->
      <div v-show="active == '文件解析'">
        <h1>文件文本提取工具</h1>
        <p>上传文件提取文本内容（支持 .txt, .pdf, .docx, mp4 , mp3, wav）</p>
  
        <div class="container">
          <input type="file" id="file-input" accept=".txt,.pdf,.docx,'.mp4','.mp3','.wav'"/>
          <button @click="chooseFile">选择文件</button>
          <p id="file-info">未选择文件</p>
        </div>
  
        <button id="extract-btn" disabled>提取文本</button>
  
        <h3>提取结果：</h3>
        <div id="result">请先上传文件...</div>
      </div>
      <!-- 图片解析 -->
      <div v-show="active == '图片解析'" v-loading="loading" element-loading-text="模型加载中......">
        <div class="flex align-center just-between">
          <h1>文件文本提取工具</h1>
          <span title="刷新" class="pointer"  >
            <el-icon color="#0072ff" size="30px" @click="initPage"><RefreshRight /></el-icon>
            <el-icon color="#23c044" size="14px" v-if="hasLoadingJS"><CircleCheckFilled /></el-icon>
            <el-icon color="#e60e0e" size="14px" v-if="!hasLoadingJS"><CircleCloseFilled /></el-icon>
          </span>
        </div>
        <p>上传文件提取文本内容（支持 .png, .jpg ）</p>
        <div class="container flex" style="height: 248px;">
          <div class="mr10">
            <el-upload class="upload-demo" action="abc" :auto-upload="false" :on-change="onHandleChange" :show-file-list="false">
              <el-button size="medium" type="primary">上传图片</el-button>
            </el-upload>
            <p id="file-info">{{ files.name || '未选择文件' }} </p>
          </div>
          <div class="box">
            <div class="imd">
              <img :src="image" v-if="image" style="width: 340px; max-height: 200px"/>
              <img :src="image" ref="imageRef" v-show="false" />
            </div>
            <div class="imd" v-show="false">
              <canvas ref="canvasRef"></canvas>
            </div>
          </div>
        </div>
        <h3>提取结果：</h3>
        <div class="textModel noScollLine" v-loading="linadingImg" element-loading-text="图片解析中......">
          <p v-if="texts.length == 0">{{ alertText }}</p>
          <template v-else>
            <p v-for="(text, index) in texts" :key="index">{{ text }}</p>
          </template>
        </div>
      </div>
      <template #footer>
        <el-button @click="handleClose">取消</el-button>
        <el-button type="primary" @click="onComfirm">确认</el-button>
      </template>
    </el-dialog>
  </div>
</template>

<script setup>
import * as ocr from "@paddlejs-models/ocr";
import { drawBox } from "@/utils/ocrUtils";
import { nextTick, onMounted,reactive, ref,getCurrentInstance } from "vue";
const props = defineProps({
  modelValue: {
    type: Boolean,
    default: false
  }
});
const { proxy } = getCurrentInstance();
const emits = defineEmits(["update:modelValue", "change"]);
const active = ref('文件解析')

const files = ref({})
const loading = ref(true)
const linadingImg = ref(false)
const image = ref('')
const alertText = ref('请先上传文件...')
const texts = ref([])
const imageRef = ref()
const canvasRef = ref()
const textStyle = reactive({
  width: "",
  height: ""
})
const hasLoadingJS = ref(false)

onMounted(() => {
  initPage();
  nextTick(() => {
    const fileInput = document.getElementById("file-input");
    const fileInfo = document.getElementById("file-info");
    const extractBtn = document.getElementById("extract-btn");
    const resultDiv = document.getElementById("result");
    let selectedFile = null;
    // 监听文件选择
    fileInput.addEventListener("change", function (e) {
      if (e.target.files.length > 0) {
        selectedFile = e.target.files[0];
        fileInfo.textContent = `已选择: ${selectedFile.name} (${(
          selectedFile.size / 1024
        ).toFixed(2)} KB)`;
        extractBtn.disabled = false;
      } else {
        selectedFile = null;
        fileInfo.textContent = "未选择文件";
        extractBtn.disabled = true;
      }
      if (selectedFile.type == "video/mp4") {
        upfileOnchange(selectedFile);
      }
    });
    // 提取文本按钮点击事件
    extractBtn.addEventListener("click", async function () {
      if (!selectedFile) return (resultDiv.textContent = "请先选择文件");
      resultDiv.textContent = "正在处理文件...";
      try {
        let text = "";
        const fileType = selectedFile.name.split(".").pop().toLowerCase();
        console.log(selectedFile);
        console.log(fileType,'===fileType');
        if (fileType === "txt") {
          // 处理文本文件
          text = await readTextFile(selectedFile);
        } else if (fileType === "pdf") {
          // 处理PDF文件
          text = await extractTextFromPDF(selectedFile);
        } else if (fileType === "docx") {
          // 处理Word文件
          text = await extractTextFromDocx(selectedFile);
          console.log(text, "===word");
        } else if (["mp4", "mp3", "wav"].includes(fileType)) {
          // 处理mp4,mp3,wav文件
          await start();
          text = "数据加载有点慢，请稍等。。。。";
          setTimeout(() => {
            resultDiv.textContent = videoText;
          }, 2000);
        }else {
          throw new Error("不支持的文件类型");
        }
        resultDiv.textContent = text || "未提取到文本内容";
      } catch (error) {
        resultDiv.textContent = `处理失败: ${error.message}`;
        console.error(error);
      }
    });
  });
});

// 读取文本文件
function readTextFile(file) {
  return new Promise((resolve, reject) => {
    const reader = new FileReader();
    reader.onload = (e) => resolve(e.target.result);
    reader.onerror = (e) => reject(new Error("文件读取失败"));
    reader.readAsText(file);
  });
}

// 提取PDF文本
async function extractTextFromPDF(file) {
  return new Promise((resolve, reject) => {
    const fileReader = new FileReader();

    fileReader.onload = async function () {
      try {
        const typedArray = new Uint8Array(this.result);
        const pdf = await pdfjsLib.getDocument(typedArray).promise;
        let fullText = "";

        for (let i = 1; i <= pdf.numPages; i++) {
          const page = await pdf.getPage(i);
          const textContent = await page.getTextContent();
          const text = textContent.items.map((item) => item.str).join(" ");
          fullText += text + "\n\n";
        }

        resolve(fullText);
      } catch (error) {
        reject(error);
      }
    };

    fileReader.onerror = reject;
    fileReader.readAsArrayBuffer(file);
  });
}

// 提取Word文档文本
async function extractTextFromDocx(file) {
  return new Promise((resolve, reject) => {
    const reader = new FileReader();

    reader.onload = function (event) {
      const arrayBuffer = event.target.result;

      mammoth
        .extractRawText({ arrayBuffer: arrayBuffer })
        .then(function (result) {
          resolve(result.value);
        })
        .catch(function (error) {
          reject(error);
        });
    };

    reader.onerror = reject;
    reader.readAsArrayBuffer(file);
  });
}

/**
 *@Descripttion:图片页面初始化
*@Author: PengShuai
*/
const initPage = async () => {
  loading.value = true;
  try {
    await ocr.init();// 模型初始化
    proxy.$message({ type: "success",  message: "加载成功" });
    loading.value = false;
    hasLoadingJS.value = true;
  } catch (err) {
    proxy.$message({ type: "error",  message: "加载失败，请刷新页面" });
    loading.value = false;
    hasLoadingJS.value = false;
  }
}
 /**
 *@Descripttion:图片上传事件
  *@Author: PengShuai
  *@Date: 2023-12-21 10:49:36
  */
const onHandleChange = (file) => {
  files.value = file;
  image.value = URL.createObjectURL(file.raw);
  linadingImg.value = true;
  alertText.value = '图片文件解析中。。。'
  setTimeout(() => {
    getRecognize();
  }, 600);
}
// 图片解析
const getRecognize = async () => {
  const image = imageRef.value;
  const canvas = canvasRef.value;
  const res = await ocr.recognize(image);
  const { text, points } = res;
  drawBox(points, image, canvas);
  textStyle.width = image.width - 40 + "px";
  texts.value = text;
  linadingImg.value = false;
  alertText.value = '解析失败,请选择清晰一点的图片重试！'
}

// 切换标签
const changeRadio = (val) =>{
  if(val == '图片解析') {
    if(!hasLoadingJS.value) initPage()
  }
  const resultDiv = document.getElementById("result");
  resultDiv.textContent = "请先上传文件...";
  const fileInfo = document.getElementById("file-info");
  fileInfo.textContent = "选择文件";
  files.value = {}
  alertText.value = '请先上传文件...';
  texts.value = []
}
const onComfirm = () => {
  
  if(active == '文件解析'){
    const resultDiv = document.getElementById("result");
    let obj = {
      text: resultDiv.textContent
    };
    emits("change", obj);
  }else{
    emits("change", {text:texts.value});
  }
};

// 关闭
const handleClose = () => {
  const resultDiv = document.getElementById("result");
  resultDiv.textContent = "请先上传文件";
  const fileInfo = document.getElementById("file-info");
  fileInfo.textContent = "未选择文件";
  emits("update:modelValue", false);
};

const chooseFile = () => {
  document.getElementById("file-input").click();
};
</script>

<style lang="scss" scoped>
.container {
  border: 2px dashed #ccc;
  padding: 20px;
  text-align: center;
  margin-bottom: 20px;
}

#file-input {
  display: none;
}

button {
  background: #0072ff;
  color: white;
  padding: 10px 15px;
  border: none;
  cursor: pointer;
  font-size: 16px;
  border-radius: 4px;
}

button:hover {
  background: #0072ff;
}

#result {
  margin-top: 20px;
  white-space: pre-wrap;
  background: #f9f9f9;
  padding: 15px;
  border: 1px solid #ddd;
  border-radius: 4px;
  height: 270px;
  overflow-y: auto;
}

#file-info {
  margin: 10px 0;
  font-style: italic;
  color: #666;
}

::v-deep .el-dialog {
  margin-top: 10px;
}


.box{
  display: flex;
  .imd{
    flex: 1;
  }
}

.textModel{
  margin-top: 20px;
  white-space: pre-wrap;
  background: #f9f9f9;
  padding: 15px;
  border: 1px solid #ddd;
  border-radius: 4px;
  height: 195px;
  overflow-y: auto;
}
</style>