Information Technology Grimoire

Version .0.0.1

IT Notes from various projects because I forget, and hopefully they help you too.

strip HTML

Strip all HTML

#!/usr/bin/perl
use HTML::Restrict;
  my $hr = HTML::Restrict->new();
  $hr->set_rules({
    # allowed
    p  => [],
    li => [],
    ul => [],
    h4 => [],
    h3 => [],
    h2 => []
    
    # not allowed (everything by default is not allowed!)
    #img => [qw( alt / )]
    # h1 => []
  });
  foreach my $line(<DATA>){
    $line =~ s  "\&nbsp\;" "g;      # no space symbols
    $line =~ s  "\s+" "g;           # only 1 space, also remove tabs and anything that matches \s
    $line =~ s  "^\s+""g;           # trim leading spaces
    $line =~ s  "\s+$""g;           # trim training spaces
   
   print $hr->process( $line ) . "\n";
  }
__DATA__
Paste your code here below this line