We were looking for a way to remove/strip HTML tags from varchar columns. On making search found some code samples, which suggested to create a stored procedure like following.
Click to view Stored Procedure StripHtml
RETURNS VARCHAR (MAX)
AS
BEGIN
DECLARE @Start int
DECLARE @End int
DECLARE @Length int
-- Replace the HTML entity & with the '&' character (this needs to be done first, as
-- '&' might be double encoded as '&')
SET @Start = CHARINDEX ('&', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF (@HTMLText, @Start, @Length, '&')
SET @Start = CHARINDEX ('&', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
END
-- Replace the HTML entity < with the '<' character
SET @Start = CHARINDEX ('<', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF (@HTMLText, @Start, @Length, '<')
SET @Start = CHARINDEX ('<', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
END
-- Replace the HTML entity > with the '>' character
SET @Start = CHARINDEX ('>', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF (@HTMLText, @Start, @Length, '>')
SET @Start = CHARINDEX ('>', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
END
-- Replace the HTML entity & with the '&' character
SET @Start = CHARINDEX ('&', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF (@HTMLText, @Start, @Length, '&')
SET @Start = CHARINDEX ('&', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
END
-- Replace the HTML entity with the ' ' character
SET @Start = CHARINDEX (' ', @HTMLText)
SET @End = @Start + 5
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF (@HTMLText, @Start, @Length, ' ')
SET @Start = CHARINDEX (' ', @HTMLText)
SET @End = @Start + 5
SET @Length = (@End - @Start) + 1
END
-- Replace any
tags with a newline
SET @Start = CHARINDEX ('<br />', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF (@HTMLText, @Start, @Length, CHAR(13) + CHAR(10))
SET @Start = CHARINDEX ('<br />', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
END
-- Replace any
tags with a newline
SET @Start = CHARINDEX ('<br />', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF (@HTMLText, @Start, @Length, 'CHAR(13) + CHAR(10)')
SET @Start = CHARINDEX ('<br />', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
END
-- Replace any
tags with a newline
SET @Start = CHARINDEX ('<br />', @HTMLText)
SET @End = @Start + 5
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF (@HTMLText, @Start, @Length, 'CHAR(13) + CHAR(10)')
SET @Start = CHARINDEX ('<br />', @HTMLText)
SET @End = @Start + 5
SET @Length = (@End - @Start) + 1
END
-- Remove anything between tags
SET @Start = CHARINDEX ('<', @HTMLText)
SET @End = CHARINDEX ('>', @HTMLText, CHARINDEX ('<', @HTMLText))
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF (@HTMLText, @Start, @Length, '')
SET @Start = CHARINDEX ('<', @HTMLText)
SET @End = CHARINDEX ('>', @HTMLText, CHARINDEX ('<', @HTMLText))
SET @Length = (@End - @Start) + 1
END
RETURN LTRIM( RTRIM(@HTMLText))
END
Click to view Stored Procedure StripHtml
CREATE FUNCTION [dbo].[StripHtml] (@HTMLText VARCHAR (MAX))RETURNS VARCHAR (MAX)
AS
BEGIN
DECLARE @Start int
DECLARE @End int
DECLARE @Length int
-- Replace the HTML entity & with the '&' character (this needs to be done first, as
-- '&' might be double encoded as '&')
SET @Start = CHARINDEX ('&', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF (@HTMLText, @Start, @Length, '&')
SET @Start = CHARINDEX ('&', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
END
-- Replace the HTML entity < with the '<' character
SET @Start = CHARINDEX ('<', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF (@HTMLText, @Start, @Length, '<')
SET @Start = CHARINDEX ('<', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
END
-- Replace the HTML entity > with the '>' character
SET @Start = CHARINDEX ('>', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF (@HTMLText, @Start, @Length, '>')
SET @Start = CHARINDEX ('>', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
END
-- Replace the HTML entity & with the '&' character
SET @Start = CHARINDEX ('&', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF (@HTMLText, @Start, @Length, '&')
SET @Start = CHARINDEX ('&', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
END
-- Replace the HTML entity with the ' ' character
SET @Start = CHARINDEX (' ', @HTMLText)
SET @End = @Start + 5
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF (@HTMLText, @Start, @Length, ' ')
SET @Start = CHARINDEX (' ', @HTMLText)
SET @End = @Start + 5
SET @Length = (@End - @Start) + 1
END
-- Replace any
tags with a newline
SET @Start = CHARINDEX ('<br />', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF (@HTMLText, @Start, @Length, CHAR(13) + CHAR(10))
SET @Start = CHARINDEX ('<br />', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
END
-- Replace any
tags with a newline
SET @Start = CHARINDEX ('<br />', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF (@HTMLText, @Start, @Length, 'CHAR(13) + CHAR(10)')
SET @Start = CHARINDEX ('<br />', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
END
-- Replace any
tags with a newline
SET @Start = CHARINDEX ('<br />', @HTMLText)
SET @End = @Start + 5
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF (@HTMLText, @Start, @Length, 'CHAR(13) + CHAR(10)')
SET @Start = CHARINDEX ('<br />', @HTMLText)
SET @End = @Start + 5
SET @Length = (@End - @Start) + 1
END
-- Remove anything between
SET @Start = CHARINDEX ('<', @HTMLText)
SET @End = CHARINDEX ('>', @HTMLText, CHARINDEX ('<', @HTMLText))
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF (@HTMLText, @Start, @Length, '')
SET @Start = CHARINDEX ('<', @HTMLText)
SET @End = CHARINDEX ('>', @HTMLText, CHARINDEX ('<', @HTMLText))
SET @Length = (@End - @Start) + 1
END
RETURN LTRIM( RTRIM(@HTMLText))
END
GO
NB: HTML text must be properly formatted otherwise we will get XML Parsing error.
Hope it Helps.
No comments:
Post a Comment