Blame view
scripts/cleanfile
3.41 KB
12b315603 cleanfile: a scri... |
1 2 3 4 5 6 7 8 |
#!/usr/bin/perl -w # # Clean a text file -- or directory of text files -- of stealth whitespace. # WARNING: this can be a highly destructive operation. Use with caution. # use bytes; use File::Basename; |
cb3ed5b7e scripts: Make cle... |
9 10 |
# Default options $max_width = 79; |
12b315603 cleanfile: a scri... |
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
# Clean up space-tab sequences, either by removing spaces or # replacing them with tabs. sub clean_space_tabs($) { no bytes; # Tab alignment depends on characters my($li) = @_; my($lo) = ''; my $pos = 0; my $nsp = 0; my($i, $c); for ($i = 0; $i < length($li); $i++) { $c = substr($li, $i, 1); if ($c eq "\t") { my $npos = ($pos+$nsp+8) & ~7; my $ntab = ($npos >> 3) - ($pos >> 3); $lo .= "\t" x $ntab; $pos = $npos; $nsp = 0; } elsif ($c eq " " || $c eq "\r") { $lo .= " " x $nsp; $pos += $nsp; $nsp = 0; $lo .= $c; $pos = 0; } elsif ($c eq " ") { $nsp++; } else { $lo .= " " x $nsp; $pos += $nsp; $nsp = 0; $lo .= $c; $pos++; } } $lo .= " " x $nsp; return $lo; } |
cb3ed5b7e scripts: Make cle... |
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
# Compute the visual width of a string sub strwidth($) { no bytes; # Tab alignment depends on characters my($li) = @_; my($c, $i); my $pos = 0; my $mlen = 0; for ($i = 0; $i < length($li); $i++) { $c = substr($li,$i,1); if ($c eq "\t") { $pos = ($pos+8) & ~7; } elsif ($c eq " ") { $mlen = $pos if ($pos > $mlen); $pos = 0; } else { $pos++; } } $mlen = $pos if ($pos > $mlen); return $mlen; } |
12b315603 cleanfile: a scri... |
76 |
$name = basename($0); |
cb3ed5b7e scripts: Make cle... |
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
@files = (); while (defined($a = shift(@ARGV))) { if ($a =~ /^-/) { if ($a eq '-width' || $a eq '-w') { $max_width = shift(@ARGV)+0; } else { print STDERR "Usage: $name [-width #] files... "; exit 1; } } else { push(@files, $a); } } foreach $f ( @files ) { |
12b315603 cleanfile: a scri... |
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
print STDERR "$name: $f "; if (! -f $f) { print STDERR "$f: not a file "; next; } if (!open(FILE, '+<', $f)) { print STDERR "$name: Cannot open file: $f: $! "; next; } binmode FILE; # First, verify that it is not a binary file; consider any file # with a zero byte to be a binary file. Is there any better, or # additional, heuristic that should be applied? $is_binary = 0; while (read(FILE, $data, 65536) > 0) { if ($data =~ /\0/) { $is_binary = 1; last; } } if ($is_binary) { print STDERR "$name: $f: binary file "; next; } seek(FILE, 0, 0); $in_bytes = 0; $out_bytes = 0; $blank_bytes = 0; @blanks = (); @lines = (); |
cb3ed5b7e scripts: Make cle... |
137 |
$lineno = 0; |
12b315603 cleanfile: a scri... |
138 139 |
while ( defined($line = <FILE>) ) { |
cb3ed5b7e scripts: Make cle... |
140 |
$lineno++; |
12b315603 cleanfile: a scri... |
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
$in_bytes += length($line); $line =~ s/[ \t\r]*$//; # Remove trailing spaces $line = clean_space_tabs($line); if ( $line eq " " ) { push(@blanks, $line); $blank_bytes += length($line); } else { push(@lines, @blanks); $out_bytes += $blank_bytes; push(@lines, $line); $out_bytes += length($line); @blanks = (); $blank_bytes = 0; } |
cb3ed5b7e scripts: Make cle... |
157 158 159 160 161 162 163 |
$l_width = strwidth($line); if ($max_width && $l_width > $max_width) { print STDERR "$f:$lineno: line exceeds $max_width characters ($l_width) "; } |
12b315603 cleanfile: a scri... |
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
} # Any blanks at the end of the file are discarded if ($in_bytes != $out_bytes) { # Only write to the file if changed seek(FILE, 0, 0); print FILE @lines; if ( !defined($where = tell(FILE)) || !truncate(FILE, $where) ) { die "$name: Failed to truncate modified file: $f: $! "; } } close(FILE); } |