发布网友 发布时间:2022-04-19 18:18
共4个回答
热心网友 时间:2023-02-03 08:31
添加spire.pdf.dll为引用,使用下面的代码即可提取pdf中所有表格数据到txt文档:
using System.IO;
using System.Text;
using Spire.Pdf;
using Spire.Pdf.Utilities;
namespace ExtractPdfTable
{
class Program
{
static void Main(string[] args)
{
//Create a PdfDocument object
PdfDocument doc = new PdfDocument();
//Load the sample PDF file
doc.LoadFromFile(@"C:\Users\Administrator\Desktop\Table.pdf");
//Create a StringBuilder object
StringBuilder builder = new StringBuilder();
//Initialize an instance of PdfTableExtractor class
PdfTableExtractor extractor = new PdfTableExtractor(doc);
//Declare a PdfTable array
PdfTable[] tableList = null;
int tableNum = 1;
//Loop through the pages
for (int pageIndex = 0; pageIndex < doc.Pages.Count; pageIndex++)
{
//Extract tables from a specific page
tableList = extractor.ExtractTable(pageIndex);
//Determine if the table list is null
if (tableList != null && tableList.Length > 0)
{
//Loop through the table in the list
foreach (PdfTable table in tableList)
{
builder.Append("Table " + tableNum);
builder.Append("\r\n");
//Get row number and column number of a certain table
int row = table.GetRowCount();
int column = table.GetColumnCount();
//Loop though the row and colunm
for (int i = 0; i < row; i++)
{
for (int j = 0; j < column; j++)
{
//Get text from the specific cell
string text = table.GetText(i, j);
//Add text to the string builder
builder.Append(text + " ");
}
builder.Append("\r\n");
}
builder.Append("\r\n");
tableNum += 1;
}
}
}
//Write to a .txt file
File.WriteAllText("Table.txt", builder.ToString());
}
}
}
热心网友 时间:2023-02-03 09:49
直接用xunjiePDF编辑器就可以提取热心网友 时间:2023-02-03 11:23
只能提取文件的页面,是编辑PDF文件表格,办公有 迅捷 PD F编辑器就可以了,在最近文件哪里去操作,具体过程就不说了,自己去试试吧。热心网友 时间:2023-02-03 13:15
有的私信我