How To Convert Pdf file to text in asp.net

[]
in this post I will show how to convert pdf document to the text file using pdftotext.** (pdftotext is an open-source command-line utility for converting PDF files to plain text files —i.e. extracting text data from PDF-protected files. It is freely available and included with many Linux distributions. It must be installed as part of the xpdf package for Windows.) click here to download pdftotext

<%@ Page Language="C#" AutoEventWireup="true" CodeFile="pdf2tex.aspx.cs" Inherits="pdf2tex"
   ValidateRequest="False" %>

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head runat="server">
   <title>Untitled Page</title>
</head>
<body>
   <form id="form1" runat="server">
       <div>
           <asp:FileUpload ID="FileUpload1" runat="server" />
           <br />
           <asp:Button ID="btnRead" Text="Convert" runat="Server" OnClick="btnRead_Click" />
           <br />
           <asp:TextBox ID="txtContent" runat="Server" TextMode="MultiLine" Height="376px" Width="411px"></asp:TextBox>
       </div>
   </form>
</body>
</html>
using System;
using System.Data;
using System.Configuration;
using System.Collections;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using System.IO;

public partial class pdf2tex : System.Web.UI.Page
{
    protected void Page_Load(object sender, EventArgs e)
    {

    }
    protected void btnRead_Click(object sender, EventArgs e)
    {
        string appPath = Request.ApplicationPath;
        System.Diagnostics.Process p = new System.Diagnostics.Process();
        p.StartInfo.Arguments = " -raw -htmlmeta" + " " + FileUpload1.PostedFile.FileName + " " + "c:\\output.htm"; ;
        p.StartInfo.FileName = Page.MapPath("pdftotext.exe");
       
        p.StartInfo.UseShellExecute = false;
        p.StartInfo.CreateNoWindow = false;
        p.StartInfo.RedirectStandardOutput = false;
        p.Start();
        p.WaitForExit();
        System.Threading.Thread.Sleep(3000);
        txtContent.Text = ReadFile("c:\\output.htm");


    }

    public string ReadFile(string s)
    {
        StreamReader sr = new StreamReader(s);
        string strReturn = sr.ReadToEnd();
        return strReturn;

    }
}

Post a Comment

Please do not post any spam link in the comment box😊

Previous Post Next Post

Blog ads

CodeGuru