Blame view
scripts/cleanfile
3.46 KB
cb77f0d62 scripts: Switch t... |
1 |
#!/usr/bin/env perl |
b24413180 License cleanup: ... |
2 |
# SPDX-License-Identifier: GPL-2.0 |
12b315603 cleanfile: a scri... |
3 4 5 6 |
# # Clean a text file -- or directory of text files -- of stealth whitespace. # WARNING: this can be a highly destructive operation. Use with caution. # |
cb77f0d62 scripts: Switch t... |
7 |
use warnings; |
12b315603 cleanfile: a scri... |
8 9 |
use bytes; use File::Basename; |
cb3ed5b7e scripts: Make cle... |
10 11 |
# Default options $max_width = 79; |
12b315603 cleanfile: a scri... |
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
# Clean up space-tab sequences, either by removing spaces or # replacing them with tabs. sub clean_space_tabs($) { no bytes; # Tab alignment depends on characters my($li) = @_; my($lo) = ''; my $pos = 0; my $nsp = 0; my($i, $c); for ($i = 0; $i < length($li); $i++) { $c = substr($li, $i, 1); if ($c eq "\t") { my $npos = ($pos+$nsp+8) & ~7; my $ntab = ($npos >> 3) - ($pos >> 3); $lo .= "\t" x $ntab; $pos = $npos; $nsp = 0; } elsif ($c eq " " || $c eq "\r") { $lo .= " " x $nsp; $pos += $nsp; $nsp = 0; $lo .= $c; $pos = 0; } elsif ($c eq " ") { $nsp++; } else { $lo .= " " x $nsp; $pos += $nsp; $nsp = 0; $lo .= $c; $pos++; } } $lo .= " " x $nsp; return $lo; } |
cb3ed5b7e scripts: Make cle... |
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
# Compute the visual width of a string sub strwidth($) { no bytes; # Tab alignment depends on characters my($li) = @_; my($c, $i); my $pos = 0; my $mlen = 0; for ($i = 0; $i < length($li); $i++) { $c = substr($li,$i,1); if ($c eq "\t") { $pos = ($pos+8) & ~7; } elsif ($c eq " ") { $mlen = $pos if ($pos > $mlen); $pos = 0; } else { $pos++; } } $mlen = $pos if ($pos > $mlen); return $mlen; } |
12b315603 cleanfile: a scri... |
77 |
$name = basename($0); |
cb3ed5b7e scripts: Make cle... |
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
@files = (); while (defined($a = shift(@ARGV))) { if ($a =~ /^-/) { if ($a eq '-width' || $a eq '-w') { $max_width = shift(@ARGV)+0; } else { print STDERR "Usage: $name [-width #] files... "; exit 1; } } else { push(@files, $a); } } foreach $f ( @files ) { |
12b315603 cleanfile: a scri... |
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
print STDERR "$name: $f "; if (! -f $f) { print STDERR "$f: not a file "; next; } if (!open(FILE, '+<', $f)) { print STDERR "$name: Cannot open file: $f: $! "; next; } binmode FILE; # First, verify that it is not a binary file; consider any file # with a zero byte to be a binary file. Is there any better, or # additional, heuristic that should be applied? $is_binary = 0; while (read(FILE, $data, 65536) > 0) { if ($data =~ /\0/) { $is_binary = 1; last; } } if ($is_binary) { print STDERR "$name: $f: binary file "; next; } seek(FILE, 0, 0); $in_bytes = 0; $out_bytes = 0; $blank_bytes = 0; @blanks = (); @lines = (); |
cb3ed5b7e scripts: Make cle... |
138 |
$lineno = 0; |
12b315603 cleanfile: a scri... |
139 140 |
while ( defined($line = <FILE>) ) { |
cb3ed5b7e scripts: Make cle... |
141 |
$lineno++; |
12b315603 cleanfile: a scri... |
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
$in_bytes += length($line); $line =~ s/[ \t\r]*$//; # Remove trailing spaces $line = clean_space_tabs($line); if ( $line eq " " ) { push(@blanks, $line); $blank_bytes += length($line); } else { push(@lines, @blanks); $out_bytes += $blank_bytes; push(@lines, $line); $out_bytes += length($line); @blanks = (); $blank_bytes = 0; } |
cb3ed5b7e scripts: Make cle... |
158 159 160 161 162 163 164 |
$l_width = strwidth($line); if ($max_width && $l_width > $max_width) { print STDERR "$f:$lineno: line exceeds $max_width characters ($l_width) "; } |
12b315603 cleanfile: a scri... |
165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
} # Any blanks at the end of the file are discarded if ($in_bytes != $out_bytes) { # Only write to the file if changed seek(FILE, 0, 0); print FILE @lines; if ( !defined($where = tell(FILE)) || !truncate(FILE, $where) ) { die "$name: Failed to truncate modified file: $f: $! "; } } close(FILE); } |