The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/scripts/cleanfile

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 #!/usr/bin/perl -w
    2 #
    3 # Clean a text file -- or directory of text files -- of stealth whitespace.
    4 # WARNING: this can be a highly destructive operation.  Use with caution.
    5 #
    6 
    7 use bytes;
    8 use File::Basename;
    9 
   10 # Default options
   11 $max_width = 79;
   12 
   13 # Clean up space-tab sequences, either by removing spaces or
   14 # replacing them with tabs.
   15 sub clean_space_tabs($)
   16 {
   17     no bytes;                   # Tab alignment depends on characters
   18 
   19     my($li) = @_;
   20     my($lo) = '';
   21     my $pos = 0;
   22     my $nsp = 0;
   23     my($i, $c);
   24 
   25     for ($i = 0; $i < length($li); $i++) {
   26         $c = substr($li, $i, 1);
   27         if ($c eq "\t") {
   28             my $npos = ($pos+$nsp+8) & ~7;
   29             my $ntab = ($npos >> 3) - ($pos >> 3);
   30             $lo .= "\t" x $ntab;
   31             $pos = $npos;
   32             $nsp = 0;
   33         } elsif ($c eq "\n" || $c eq "\r") {
   34             $lo .= " " x $nsp;
   35             $pos += $nsp;
   36             $nsp = 0;
   37             $lo .= $c;
   38             $pos = 0;
   39         } elsif ($c eq " ") {
   40             $nsp++;
   41         } else {
   42             $lo .= " " x $nsp;
   43             $pos += $nsp;
   44             $nsp = 0;
   45             $lo .= $c;
   46             $pos++;
   47         }
   48     }
   49     $lo .= " " x $nsp;
   50     return $lo;
   51 }
   52 
   53 # Compute the visual width of a string
   54 sub strwidth($) {
   55     no bytes;                   # Tab alignment depends on characters
   56 
   57     my($li) = @_;
   58     my($c, $i);
   59     my $pos = 0;
   60     my $mlen = 0;
   61 
   62     for ($i = 0; $i < length($li); $i++) {
   63         $c = substr($li,$i,1);
   64         if ($c eq "\t") {
   65             $pos = ($pos+8) & ~7;
   66         } elsif ($c eq "\n") {
   67             $mlen = $pos if ($pos > $mlen);
   68             $pos = 0;
   69         } else {
   70             $pos++;
   71         }
   72     }
   73 
   74     $mlen = $pos if ($pos > $mlen);
   75     return $mlen;
   76 }
   77 
   78 $name = basename($0);
   79 
   80 @files = ();
   81 
   82 while (defined($a = shift(@ARGV))) {
   83     if ($a =~ /^-/) {
   84         if ($a eq '-width' || $a eq '-w') {
   85             $max_width = shift(@ARGV)+0;
   86         } else {
   87             print STDERR "Usage: $name [-width #] files...\n";
   88             exit 1;
   89         }
   90     } else {
   91         push(@files, $a);
   92     }
   93 }
   94 
   95 foreach $f ( @files ) {
   96     print STDERR "$name: $f\n";
   97 
   98     if (! -f $f) {
   99         print STDERR "$f: not a file\n";
  100         next;
  101     }
  102 
  103     if (!open(FILE, '+<', $f)) {
  104         print STDERR "$name: Cannot open file: $f: $!\n";
  105         next;
  106     }
  107 
  108     binmode FILE;
  109 
  110     # First, verify that it is not a binary file; consider any file
  111     # with a zero byte to be a binary file.  Is there any better, or
  112     # additional, heuristic that should be applied?
  113     $is_binary = 0;
  114 
  115     while (read(FILE, $data, 65536) > 0) {
  116         if ($data =~ /\0/) {
  117             $is_binary = 1;
  118             last;
  119         }
  120     }
  121 
  122     if ($is_binary) {
  123         print STDERR "$name: $f: binary file\n";
  124         next;
  125     }
  126 
  127     seek(FILE, 0, 0);
  128 
  129     $in_bytes = 0;
  130     $out_bytes = 0;
  131     $blank_bytes = 0;
  132 
  133     @blanks = ();
  134     @lines  = ();
  135     $lineno = 0;
  136 
  137     while ( defined($line = <FILE>) ) {
  138         $lineno++;
  139         $in_bytes += length($line);
  140         $line =~ s/[ \t\r]*$//;         # Remove trailing spaces
  141         $line = clean_space_tabs($line);
  142 
  143         if ( $line eq "\n" ) {
  144             push(@blanks, $line);
  145             $blank_bytes += length($line);
  146         } else {
  147             push(@lines, @blanks);
  148             $out_bytes += $blank_bytes;
  149             push(@lines, $line);
  150             $out_bytes += length($line);
  151             @blanks = ();
  152             $blank_bytes = 0;
  153         }
  154 
  155         $l_width = strwidth($line);
  156         if ($max_width && $l_width > $max_width) {
  157             print STDERR
  158                 "$f:$lineno: line exceeds $max_width characters ($l_width)\n";
  159         }
  160     }
  161 
  162     # Any blanks at the end of the file are discarded
  163 
  164     if ($in_bytes != $out_bytes) {
  165         # Only write to the file if changed
  166         seek(FILE, 0, 0);
  167         print FILE @lines;
  168 
  169         if ( !defined($where = tell(FILE)) ||
  170              !truncate(FILE, $where) ) {
  171             die "$name: Failed to truncate modified file: $f: $!\n";
  172         }
  173     }
  174 
  175     close(FILE);
  176 }

Cache object: 29c286fc8f8e050594bc4d16bbaa93cf


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.