Showing posts with label Regex. Show all posts
Showing posts with label Regex. Show all posts

How To Convert HTML to Text, Easily


Whether you want to convert an HTML page into pure text so you can parse out that special piece of information, or you simply want to load a page from the Net into your own word processing package, this mini function could come in handy.
It’s called StripTags and accepts an HTML string. Using a regular expression, it identifies all <tags>, removes them, and returns the modified string. Here’s the code:

<%@ Page Language="C#" ValidateRequest="False" AutoEventWireup="true" CodeFile="StripTag.aspx.cs"
 Inherits="StripTag" %>

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head runat="server">
 <title>Untitled Page</title>
</head>
<body>
 <form id="form1" runat="server">
     <div>
         <asp:TextBox ID="TextBox1" runat="server" Height="172px" Width="363px" TextMode="MultiLine"></asp:TextBox></div>
     <asp:Button ID="Button1" runat="server" Text="Button" OnClick="Button1_Click" />
     <asp:Label ID="Label1" runat="server" Text="Label"></asp:Label>
 </form>
</body>
</html>


using System;
using System.Data;
using System.Configuration;
using System.Collections;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;

public partial class StripTag : System.Web.UI.Page
{
   protected void Page_Load(object sender, EventArgs e)
   {


   }
   public string StripTags(string HTML)
   {
       // Removes tags from passed HTML 
       return System.Text.RegularExpressions.Regex.Replace(HTML, "<[^>]*>", "");
   }


   protected void Button1_Click(object sender, EventArgs e)
   {
       Label1.Text = StripTags(TextBox1.Text);

   }
}