Commit 3cbea4366f17dcb22f3bf5f253eeb86b622d24d0

Authored by Tony Finch
Committed by Michal Marek
1 parent c56eb8fb6d

unifdef: update to upstream version 2.5

Fix a long-standing cpp compatibility bug. The -DFOO argument
(without an explicit value) should define FOO to 1 not to the empty
string.

Add a -o option to support overwriting a file in place, and a -S
option to list the nesting depth of symbols. Include line numbers
in debugging output. Support CRLF newlines.

Signed-off-by: Tony Finch <dot@dotat.at>
Signed-off-by: Michal Marek <mmarek@suse.cz>

Showing 1 changed file with 184 additions and 63 deletions Side-by-side Diff

1 1 /*
2   - * Copyright (c) 2002 - 2009 Tony Finch <dot@dotat.at>
  2 + * Copyright (c) 2002 - 2011 Tony Finch <dot@dotat.at>
3 3 *
4 4 * Redistribution and use in source and binary forms, with or without
5 5 * modification, are permitted provided that the following conditions
6 6  
... ... @@ -24,23 +24,14 @@
24 24 */
25 25  
26 26 /*
  27 + * unifdef - remove ifdef'ed lines
  28 + *
27 29 * This code was derived from software contributed to Berkeley by Dave Yost.
28 30 * It was rewritten to support ANSI C by Tony Finch. The original version
29 31 * of unifdef carried the 4-clause BSD copyright licence. None of its code
30 32 * remains in this version (though some of the names remain) so it now
31 33 * carries a more liberal licence.
32 34 *
33   - * The latest version is available from http://dotat.at/prog/unifdef
34   - */
35   -
36   -static const char * const copyright[] = {
37   - "@(#) Copyright (c) 2002 - 2009 Tony Finch <dot@dotat.at>\n",
38   - "$dotat: unifdef/unifdef.c,v 1.190 2009/11/27 17:21:26 fanf2 Exp $",
39   -};
40   -
41   -/*
42   - * unifdef - remove ifdef'ed lines
43   - *
44 35 * Wishlist:
45 36 * provide an option which will append the name of the
46 37 * appropriate symbol after #else's and #endif's
47 38  
48 39  
... ... @@ -48,12 +39,16 @@
48 39 * #else's and #endif's to see that they match their
49 40 * corresponding #ifdef or #ifndef
50 41 *
51   - * The first two items above require better buffer handling, which would
52   - * also make it possible to handle all "dodgy" directives correctly.
  42 + * These require better buffer handling, which would also make
  43 + * it possible to handle all "dodgy" directives correctly.
53 44 */
54 45  
  46 +#include <sys/types.h>
  47 +#include <sys/stat.h>
  48 +
55 49 #include <ctype.h>
56 50 #include <err.h>
  51 +#include <errno.h>
57 52 #include <stdarg.h>
58 53 #include <stdbool.h>
59 54 #include <stdio.h>
... ... @@ -61,6 +56,12 @@
61 56 #include <string.h>
62 57 #include <unistd.h>
63 58  
  59 +const char copyright[] =
  60 + "@(#) $Version: unifdef-2.5 $\n"
  61 + "@(#) $Author: Tony Finch (dot@dotat.at) $\n"
  62 + "@(#) $URL: http://dotat.at/prog/unifdef $\n"
  63 +;
  64 +
64 65 /* types of input lines: */
65 66 typedef enum {
66 67 LT_TRUEI, /* a true #if with ignore flag */
... ... @@ -153,6 +154,11 @@
153 154 #define EDITSLOP 10
154 155  
155 156 /*
  157 + * For temporary filenames
  158 + */
  159 +#define TEMPLATE "unifdef.XXXXXX"
  160 +
  161 +/*
156 162 * Globals.
157 163 */
158 164  
... ... @@ -165,6 +171,7 @@
165 171 static bool killconsts; /* -k: eval constant #ifs */
166 172 static bool lnnum; /* -n: add #line directives */
167 173 static bool symlist; /* -s: output symbol list */
  174 +static bool symdepth; /* -S: output symbol depth */
168 175 static bool text; /* -t: this is a text file */
169 176  
170 177 static const char *symname[MAXSYMS]; /* symbol name */
171 178  
... ... @@ -175,10 +182,18 @@
175 182 static FILE *input; /* input file pointer */
176 183 static const char *filename; /* input file name */
177 184 static int linenum; /* current line number */
  185 +static FILE *output; /* output file pointer */
  186 +static const char *ofilename; /* output file name */
  187 +static bool overwriting; /* output overwrites input */
  188 +static char tempname[FILENAME_MAX]; /* used when overwriting */
178 189  
179 190 static char tline[MAXLINE+EDITSLOP];/* input buffer plus space */
180 191 static char *keyword; /* used for editing #elif's */
181 192  
  193 +static const char *newline; /* input file format */
  194 +static const char newline_unix[] = "\n";
  195 +static const char newline_crlf[] = "\r\n";
  196 +
182 197 static Comment_state incomment; /* comment parser state */
183 198 static Line_state linestate; /* #if line parser state */
184 199 static Ifstate ifstate[MAXDEPTH]; /* #if processor state */
185 200  
... ... @@ -189,10 +204,13 @@
189 204 static unsigned blankcount; /* count of blank lines */
190 205 static unsigned blankmax; /* maximum recent blankcount */
191 206 static bool constexpr; /* constant #if expression */
  207 +static bool zerosyms = true; /* to format symdepth output */
  208 +static bool firstsym; /* ditto */
192 209  
193 210 static int exitstat; /* program exit status */
194 211  
195 212 static void addsym(bool, bool, char *);
  213 +static void closeout(void);
196 214 static void debug(const char *, ...);
197 215 static void done(void);
198 216 static void error(const char *);
... ... @@ -212,6 +230,7 @@
212 230 static int strlcmp(const char *, const char *, size_t);
213 231 static void unnest(void);
214 232 static void usage(void);
  233 +static void version(void);
215 234  
216 235 #define endsym(c) (!isalnum((unsigned char)c) && c != '_')
217 236  
... ... @@ -223,7 +242,7 @@
223 242 {
224 243 int opt;
225 244  
226   - while ((opt = getopt(argc, argv, "i:D:U:I:BbcdeKklnst")) != -1)
  245 + while ((opt = getopt(argc, argv, "i:D:U:I:o:bBcdeKklnsStV")) != -1)
227 246 switch (opt) {
228 247 case 'i': /* treat stuff controlled by these symbols as text */
229 248 /*
230 249  
231 250  
... ... @@ -245,16 +264,15 @@
245 264 case 'U': /* undef a symbol */
246 265 addsym(false, false, optarg);
247 266 break;
248   - case 'I':
249   - /* no-op for compatibility with cpp */
  267 + case 'I': /* no-op for compatibility with cpp */
250 268 break;
251   - case 'B': /* compress blank lines around removed section */
252   - compblank = true;
253   - break;
254 269 case 'b': /* blank deleted lines instead of omitting them */
255 270 case 'l': /* backwards compatibility */
256 271 lnblank = true;
257 272 break;
  273 + case 'B': /* compress blank lines around removed section */
  274 + compblank = true;
  275 + break;
258 276 case 'c': /* treat -D as -U and vice versa */
259 277 complement = true;
260 278 break;
261 279  
262 280  
... ... @@ -273,12 +291,20 @@
273 291 case 'n': /* add #line directive after deleted lines */
274 292 lnnum = true;
275 293 break;
  294 + case 'o': /* output to a file */
  295 + ofilename = optarg;
  296 + break;
276 297 case 's': /* only output list of symbols that control #ifs */
277 298 symlist = true;
278 299 break;
  300 + case 'S': /* list symbols with their nesting depth */
  301 + symlist = symdepth = true;
  302 + break;
279 303 case 't': /* don't parse C comments */
280 304 text = true;
281 305 break;
  306 + case 'V': /* print version */
  307 + version();
282 308 default:
283 309 usage();
284 310 }
285 311  
286 312  
287 313  
... ... @@ -290,21 +316,68 @@
290 316 errx(2, "can only do one file");
291 317 } else if (argc == 1 && strcmp(*argv, "-") != 0) {
292 318 filename = *argv;
293   - input = fopen(filename, "r");
  319 + input = fopen(filename, "rb");
294 320 if (input == NULL)
295 321 err(2, "can't open %s", filename);
296 322 } else {
297 323 filename = "[stdin]";
298 324 input = stdin;
299 325 }
  326 + if (ofilename == NULL) {
  327 + ofilename = "[stdout]";
  328 + output = stdout;
  329 + } else {
  330 + struct stat ist, ost;
  331 + if (stat(ofilename, &ost) == 0 &&
  332 + fstat(fileno(input), &ist) == 0)
  333 + overwriting = (ist.st_dev == ost.st_dev
  334 + && ist.st_ino == ost.st_ino);
  335 + if (overwriting) {
  336 + const char *dirsep;
  337 + int ofd;
  338 +
  339 + dirsep = strrchr(ofilename, '/');
  340 + if (dirsep != NULL)
  341 + snprintf(tempname, sizeof(tempname),
  342 + "%.*s/" TEMPLATE,
  343 + (int)(dirsep - ofilename), ofilename);
  344 + else
  345 + snprintf(tempname, sizeof(tempname),
  346 + TEMPLATE);
  347 + ofd = mkstemp(tempname);
  348 + if (ofd != -1)
  349 + output = fdopen(ofd, "wb+");
  350 + if (output == NULL)
  351 + err(2, "can't create temporary file");
  352 + fchmod(ofd, ist.st_mode & (S_IRWXU|S_IRWXG|S_IRWXO));
  353 + } else {
  354 + output = fopen(ofilename, "wb");
  355 + if (output == NULL)
  356 + err(2, "can't open %s", ofilename);
  357 + }
  358 + }
300 359 process();
301 360 abort(); /* bug */
302 361 }
303 362  
304 363 static void
  364 +version(void)
  365 +{
  366 + const char *c = copyright;
  367 + for (;;) {
  368 + while (*++c != '$')
  369 + if (*c == '\0')
  370 + exit(0);
  371 + while (*++c != '$')
  372 + putc(*c, stderr);
  373 + putc('\n', stderr);
  374 + }
  375 +}
  376 +
  377 +static void
305 378 usage(void)
306 379 {
307   - fprintf(stderr, "usage: unifdef [-BbcdeKknst] [-Ipath]"
  380 + fprintf(stderr, "usage: unifdef [-bBcdeKknsStV] [-Ipath]"
308 381 " [-Dsym[=val]] [-Usym] [-iDsym[=val]] [-iUsym] ... [file]\n");
309 382 exit(2);
310 383 }
... ... @@ -322,7 +395,8 @@
322 395 * When we have processed a group that starts off with a known-false
323 396 * #if/#elif sequence (which has therefore been deleted) followed by a
324 397 * #elif that we don't understand and therefore must keep, we edit the
325   - * latter into a #if to keep the nesting correct.
  398 + * latter into a #if to keep the nesting correct. We use strncpy() to
  399 + * overwrite the 4 byte token "elif" with "if " without a '\0' byte.
326 400 *
327 401 * When we find a true #elif in a group, the following block will
328 402 * always be kept and the rest of the sequence after the next #elif or
329 403  
... ... @@ -375,11 +449,11 @@
375 449 static void Idrop (void) { Fdrop(); ignoreon(); }
376 450 static void Itrue (void) { Ftrue(); ignoreon(); }
377 451 static void Ifalse(void) { Ffalse(); ignoreon(); }
378   -/* edit this line */
  452 +/* modify this line */
379 453 static void Mpass (void) { strncpy(keyword, "if ", 4); Pelif(); }
380   -static void Mtrue (void) { keywordedit("else\n"); state(IS_TRUE_MIDDLE); }
381   -static void Melif (void) { keywordedit("endif\n"); state(IS_FALSE_TRAILER); }
382   -static void Melse (void) { keywordedit("endif\n"); state(IS_FALSE_ELSE); }
  454 +static void Mtrue (void) { keywordedit("else"); state(IS_TRUE_MIDDLE); }
  455 +static void Melif (void) { keywordedit("endif"); state(IS_FALSE_TRAILER); }
  456 +static void Melse (void) { keywordedit("endif"); state(IS_FALSE_ELSE); }
383 457  
384 458 static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
385 459 /* IS_OUTSIDE */
... ... @@ -431,13 +505,6 @@
431 505 * State machine utility functions
432 506 */
433 507 static void
434   -done(void)
435   -{
436   - if (incomment)
437   - error("EOF in comment");
438   - exit(exitstat);
439   -}
440   -static void
441 508 ignoreoff(void)
442 509 {
443 510 if (depth == 0)
... ... @@ -452,14 +519,8 @@
452 519 static void
453 520 keywordedit(const char *replacement)
454 521 {
455   - size_t size = tline + sizeof(tline) - keyword;
456   - char *dst = keyword;
457   - const char *src = replacement;
458   - if (size != 0) {
459   - while ((--size != 0) && (*src != '\0'))
460   - *dst++ = *src++;
461   - *dst = '\0';
462   - }
  522 + snprintf(keyword, tline + sizeof(tline) - keyword,
  523 + "%s%s", replacement, newline);
463 524 print();
464 525 }
465 526 static void
466 527  
467 528  
468 529  
... ... @@ -494,24 +555,26 @@
494 555 if (symlist)
495 556 return;
496 557 if (keep ^ complement) {
497   - bool blankline = tline[strspn(tline, " \t\n")] == 'n")] == '\0';';
  558 + bool blankline = tline[strspn(tline, " \t\r\n")] == 'n")] == '\0';';
498 559 if (blankline && compblank && blankcount != blankmax) {
499 560 delcount += 1;
500 561 blankcount += 1;
501 562 } else {
502 563 if (lnnum && delcount > 0)
503   - printf("#line %d\n", linenum);
504   - fputs(tline, stdout);
  564 + printf("#line %d%s", linenum, newline);
  565 + fputs(tline, output);
505 566 delcount = 0;
506 567 blankmax = blankcount = blankline ? blankcount + 1 : 0;
507 568 }
508 569 } else {
509 570 if (lnblank)
510   - putc('\n', stdout);
  571 + fputs(newline, output);
511 572 exitstat = 1;
512 573 delcount += 1;
513 574 blankcount = 0;
514 575 }
  576 + if (debugging)
  577 + fflush(output);
515 578 }
516 579  
517 580 /*
518 581  
519 582  
520 583  
... ... @@ -520,22 +583,55 @@
520 583 static void
521 584 process(void)
522 585 {
523   - Linetype lineval;
524   -
525 586 /* When compressing blank lines, act as if the file
526 587 is preceded by a large number of blank lines. */
527 588 blankmax = blankcount = 1000;
528 589 for (;;) {
529   - linenum++;
530   - lineval = parseline();
  590 + Linetype lineval = parseline();
531 591 trans_table[ifstate[depth]][lineval]();
532   - debug("process %s -> %s depth %d",
533   - linetype_name[lineval],
  592 + debug("process line %d %s -> %s depth %d",
  593 + linenum, linetype_name[lineval],
534 594 ifstate_name[ifstate[depth]], depth);
535 595 }
536 596 }
537 597  
538 598 /*
  599 + * Flush the output and handle errors.
  600 + */
  601 +static void
  602 +closeout(void)
  603 +{
  604 + if (symdepth && !zerosyms)
  605 + printf("\n");
  606 + if (fclose(output) == EOF) {
  607 + warn("couldn't write to %s", ofilename);
  608 + if (overwriting) {
  609 + unlink(tempname);
  610 + errx(2, "%s unchanged", filename);
  611 + } else {
  612 + exit(2);
  613 + }
  614 + }
  615 +}
  616 +
  617 +/*
  618 + * Clean up and exit.
  619 + */
  620 +static void
  621 +done(void)
  622 +{
  623 + if (incomment)
  624 + error("EOF in comment");
  625 + closeout();
  626 + if (overwriting && rename(tempname, ofilename) == -1) {
  627 + warn("couldn't rename temporary file");
  628 + unlink(tempname);
  629 + errx(2, "%s unchanged", ofilename);
  630 + }
  631 + exit(exitstat);
  632 +}
  633 +
  634 +/*
539 635 * Parse a line and determine its type. We keep the preprocessor line
540 636 * parser state between calls in the global variable linestate, with
541 637 * help from skipcomment().
542 638  
543 639  
... ... @@ -549,14 +645,22 @@
549 645 Linetype retval;
550 646 Comment_state wascomment;
551 647  
  648 + linenum++;
552 649 if (fgets(tline, MAXLINE, input) == NULL)
553 650 return (LT_EOF);
  651 + if (newline == NULL) {
  652 + if (strrchr(tline, '\n') == strrchr(tline, '\r') + 1)
  653 + newline = newline_crlf;
  654 + else
  655 + newline = newline_unix;
  656 + }
554 657 retval = LT_PLAIN;
555 658 wascomment = incomment;
556 659 cp = skipcomment(tline);
557 660 if (linestate == LS_START) {
558 661 if (*cp == '#') {
559 662 linestate = LS_HASH;
  663 + firstsym = true;
560 664 cp = skipcomment(cp + 1);
561 665 } else if (*cp != '\0')
562 666 linestate = LS_DIRTY;
... ... @@ -566,7 +670,8 @@
566 670 cp = skipsym(cp);
567 671 kwlen = cp - keyword;
568 672 /* no way can we deal with a continuation inside a keyword */
569   - if (strncmp(cp, "\\\n", 2) == 0)
  673 + if (strncmp(cp, "\\\r\n", 3) == 0 ||
  674 + strncmp(cp, "\\\n", 2) == 0)
570 675 Eioccc();
571 676 if (strlcmp("ifdef", keyword, kwlen) == 0 ||
572 677 strlcmp("ifndef", keyword, kwlen) == 0) {
... ... @@ -617,9 +722,8 @@
617 722 size_t len = cp - tline;
618 723 if (fgets(tline + len, MAXLINE - len, input) == NULL) {
619 724 /* append the missing newline */
620   - tline[len+0] = '\n';
621   - tline[len+1] = '\0';
622   - cp++;
  725 + strcpy(tline + len, newline);
  726 + cp += strlen(newline);
623 727 linestate = LS_START;
624 728 } else {
625 729 linestate = LS_DIRTY;
... ... @@ -630,7 +734,7 @@
630 734 while (*cp != '\0')
631 735 cp = skipcomment(cp + 1);
632 736 }
633   - debug("parser %s comment %s line",
  737 + debug("parser line %d state %s comment %s line", linenum,
634 738 comment_name[incomment], linestate_name[linestate]);
635 739 return (retval);
636 740 }
637 741  
638 742  
... ... @@ -875,12 +979,17 @@
875 979 }
876 980 while (*cp != '\0')
877 981 /* don't reset to LS_START after a line continuation */
878   - if (strncmp(cp, "\\\n", 2) == 0)
  982 + if (strncmp(cp, "\\\r\n", 3) == 0)
  983 + cp += 3;
  984 + else if (strncmp(cp, "\\\n", 2) == 0)
879 985 cp += 2;
880 986 else switch (incomment) {
881 987 case NO_COMMENT:
882   - if (strncmp(cp, "/\\\n", 3) == 0) {
  988 + if (strncmp(cp, "/\\\r\n", 4) == 0) {
883 989 incomment = STARTING_COMMENT;
  990 + cp += 4;
  991 + } else if (strncmp(cp, "/\\\n", 3) == 0) {
  992 + incomment = STARTING_COMMENT;
884 993 cp += 3;
885 994 } else if (strncmp(cp, "/*", 2) == 0) {
886 995 incomment = C_COMMENT;
... ... @@ -899,7 +1008,7 @@
899 1008 } else if (strncmp(cp, "\n", 1) == 0) {
900 1009 linestate = LS_START;
901 1010 cp += 1;
902   - } else if (strchr(" \t", *cp) != NULL) {
  1011 + } else if (strchr(" \r\t", *cp) != NULL) {
903 1012 cp += 1;
904 1013 } else
905 1014 return (cp);
906 1015  
... ... @@ -931,8 +1040,11 @@
931 1040 cp += 1;
932 1041 continue;
933 1042 case C_COMMENT:
934   - if (strncmp(cp, "*\\\n", 3) == 0) {
  1043 + if (strncmp(cp, "*\\\r\n", 4) == 0) {
935 1044 incomment = FINISHING_COMMENT;
  1045 + cp += 4;
  1046 + } else if (strncmp(cp, "*\\\n", 3) == 0) {
  1047 + incomment = FINISHING_COMMENT;
936 1048 cp += 3;
937 1049 } else if (strncmp(cp, "*/", 2) == 0) {
938 1050 incomment = NO_COMMENT;
... ... @@ -1015,7 +1127,13 @@
1015 1127 if (cp == str)
1016 1128 return (-1);
1017 1129 if (symlist) {
1018   - printf("%.*s\n", (int)(cp-str), str);
  1130 + if (symdepth && firstsym)
  1131 + printf("%s%3d", zerosyms ? "" : "\n", depth);
  1132 + firstsym = zerosyms = false;
  1133 + printf("%s%.*s%s",
  1134 + symdepth ? " " : "",
  1135 + (int)(cp-str), str,
  1136 + symdepth ? "" : "\n");
1019 1137 /* we don't care about the value of the symbol */
1020 1138 return (0);
1021 1139 }
... ... @@ -1052,7 +1170,7 @@
1052 1170 value[symind] = val+1;
1053 1171 *val = '\0';
1054 1172 } else if (*val == '\0')
1055   - value[symind] = "";
  1173 + value[symind] = "1";
1056 1174 else
1057 1175 usage();
1058 1176 } else {
... ... @@ -1060,6 +1178,8 @@
1060 1178 usage();
1061 1179 value[symind] = NULL;
1062 1180 }
  1181 + debug("addsym %s=%s", symname[symind],
  1182 + value[symind] ? value[symind] : "undef");
1063 1183 }
1064 1184  
1065 1185 /*
... ... @@ -1100,6 +1220,7 @@
1100 1220 else
1101 1221 warnx("%s: %d: %s (#if line %d depth %d)",
1102 1222 filename, linenum, msg, stifline[depth], depth);
  1223 + closeout();
1103 1224 errx(2, "output may be truncated");
1104 1225 }