1 /*        $NetBSD: col.c,v 1.20 2021/09/10 21:52:17 rillig Exp $      */
2 
3 /*-
4  * SPDX-License-Identifier: BSD-3-Clause
5  *
6  * Copyright (c) 1990, 1993, 1994
7  *        The Regents of the University of California.  All rights reserved.
8  *
9  * This code is derived from software contributed to Berkeley by
10  * Michael Rendell of the Memorial University of Newfoundland.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #include <sys/cdefs.h>
38 #ifndef lint
39 __COPYRIGHT("@(#) Copyright (c) 1990, 1993, 1994\
40  The Regents of the University of California.  All rights reserved.");
41 #endif /* not lint */
42 
43 #ifndef lint
44 #if 0
45 static char sccsid[] = "@(#)col.c       8.5 (Berkeley) 5/4/95";
46 __FBSDID("$FreeBSD: head/usr.bin/col/col.c 366577 2020-10-09 15:27:37Z markj $")
47 ;
48 
49 #endif
50 __RCSID("$NetBSD: col.c,v 1.20 2021/09/10 21:52:17 rillig Exp $");
51 #endif /* not lint */
52 
53 #include <err.h>
54 #include <errno.h>
55 #include <inttypes.h>
56 #include <limits.h>
57 #include <locale.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <termios.h>
62 #include <unistd.h>
63 #include <wchar.h>
64 #include <wctype.h>
65 
66 #define   BS        '\b'                /* backspace */
67 #define   TAB       '\t'                /* tab */
68 #define   SPACE     ' '                 /* space */
69 #define   NL        '\n'                /* newline */
70 #define   CR        '\r'                /* carriage return */
71 #define   ESC       '\033'              /* escape */
72 #define   SI        '\017'              /* shift in to normal character set */
73 #define   SO        '\016'              /* shift out to alternate character set */
74 #define   VT        '\013'              /* vertical tab (aka reverse line feed) */
75 #define   RLF       '7'                 /* ESC-7 reverse line feed */
76 #define   RHLF      '8'                 /* ESC-8 reverse half-line feed */
77 #define   FHLF      '9'                 /* ESC-9 forward half-line feed */
78 
79 /* build up at least this many lines before flushing them out */
80 #define   BUFFER_MARGIN                 32
81 
82 typedef char CSET;
83 
84 typedef struct char_str {
85 #define   CS_NORMAL 1
86 #define   CS_ALTERNATE        2
87           int                 c_column; /* column character is in */
88           CSET                c_set;              /* character set (currently only 2) */
89           wchar_t             c_char;             /* character in question */
90           int                 c_width;  /* character width */
91 } CHAR;
92 
93 typedef struct line_str LINE;
94 struct line_str {
95           CHAR      *l_line;            /* characters on the line */
96           LINE      *l_prev;            /* previous line */
97           LINE      *l_next;            /* next line */
98           int       l_lsize;            /* allocated sizeof l_line */
99           int       l_line_len;                   /* strlen(l_line) */
100           int       l_needs_sort;                 /* set if chars went in out of order */
101           int       l_max_col;                    /* max column in the line */
102 };
103 
104 static void         addto_lineno(int *, int);
105 static LINE         *alloc_line(void);
106 static void         dowarn(int);
107 static void         flush_line(LINE *);
108 static void         flush_lines(int);
109 static void         flush_blanks(void);
110 static void         free_line(LINE *);
111 __dead static void  usage(void);
112 
113 static CSET         last_set;           /* char_set of last char printed */
114 static LINE         *lines;
115 static int          compress_spaces;    /* if doing space -> tab conversion */
116 static int          fine;                         /* if `fine' resolution (half lines) */
117 static int          max_bufd_lines;               /* max # of half lines to keep in memory */
118 static int          nblank_lines;                 /* # blanks after last flushed line */
119 static int          no_backspaces;                /* if not to output any backspaces */
120 static int          pass_unknown_seqs;  /* pass unknown control sequences */
121 
122 #define   PUTC(ch) \
123           do {                                              \
124                     if (putwchar(ch) == WEOF)     \
125                               errx(EXIT_FAILURE, "write error");      \
126           } while (0)
127 
128 int
main(int argc,char ** argv)129 main(int argc, char **argv)
130 {
131           wint_t ch;
132           CHAR *c;
133           CSET cur_set;                           /* current character set */
134           LINE *l;                      /* current line */
135           int extra_lines;              /* # of lines above first line */
136           int cur_col;                            /* current column */
137           int cur_line;                           /* line number of current position */
138           int max_line;                           /* max value of cur_line */
139           int this_line;                          /* line l points to */
140           int nflushd_lines;            /* number of lines that were flushed */
141           int adjust, opt, warned, width;
142           int e;
143 
144           (void)setlocale(LC_CTYPE, "");
145 
146           max_bufd_lines = 256;
147           compress_spaces = 1;                    /* compress spaces into tabs */
148           while ((opt = getopt(argc, argv, "bfhl:px")) != -1)
149                     switch (opt) {
150                     case 'b':           /* do not output backspaces */
151                               no_backspaces = 1;
152                               break;
153                     case 'f':           /* allow half forward line feeds */
154                               fine = 1;
155                               break;
156                     case 'h':           /* compress spaces into tabs */
157                               compress_spaces = 1;
158                               break;
159                     case 'l':           /* buffered line count */
160                               max_bufd_lines = (int)strtoi(optarg, NULL, 0, 1,
161                                   (INT_MAX - BUFFER_MARGIN) / 2, &e) * 2;
162                               if (e)
163                                         errc(EXIT_FAILURE, e, "bad -l argument `%s'",
164                                             optarg);
165                               break;
166                     case 'p':           /* pass unknown control sequences */
167                               pass_unknown_seqs = 1;
168                               break;
169                     case 'x':           /* do not compress spaces into tabs */
170                               compress_spaces = 0;
171                               break;
172                     case '?':
173                     default:
174                               usage();
175                     }
176 
177           if (optind != argc)
178                     usage();
179 
180           adjust = cur_col = extra_lines = warned = 0;
181           cur_line = max_line = nflushd_lines = this_line = 0;
182           cur_set = last_set = CS_NORMAL;
183           lines = l = alloc_line();
184 
185           while ((ch = getwchar()) != WEOF) {
186                     if (!iswgraph(ch)) {
187                               switch (ch) {
188                               case BS:            /* can't go back further */
189                                         if (cur_col == 0)
190                                                   continue;
191                                         --cur_col;
192                                         continue;
193                               case CR:
194                                         cur_col = 0;
195                                         continue;
196                               case ESC:           /* just ignore EOF */
197                                         switch(getwchar()) {
198                                         /*
199                                          * In the input stream, accept both the
200                                          * XPG5 sequences ESC-digit and the
201                                          * traditional BSD sequences ESC-ctrl.
202                                          */
203                                         case '\007':
204                                                   /* FALLTHROUGH */
205                                         case RLF:
206                                                   addto_lineno(&cur_line, -2);
207                                                   break;
208                                         case '\010':
209                                                   /* FALLTHROUGH */
210                                         case RHLF:
211                                                   addto_lineno(&cur_line, -1);
212                                                   break;
213                                         case '\011':
214                                                   /* FALLTHROUGH */
215                                         case FHLF:
216                                                   addto_lineno(&cur_line, 1);
217                                                   if (cur_line > max_line)
218                                                             max_line = cur_line;
219                                         }
220                                         continue;
221                               case NL:
222                                         addto_lineno(&cur_line, 2);
223                                         if (cur_line > max_line)
224                                                   max_line = cur_line;
225                                         cur_col = 0;
226                                         continue;
227                               case SPACE:
228                                         ++cur_col;
229                                         continue;
230                               case SI:
231                                         cur_set = CS_NORMAL;
232                                         continue;
233                               case SO:
234                                         cur_set = CS_ALTERNATE;
235                                         continue;
236                               case TAB:           /* adjust column */
237                                         cur_col |= 7;
238                                         ++cur_col;
239                                         continue;
240                               case VT:
241                                         addto_lineno(&cur_line, -2);
242                                         continue;
243                               }
244                               if (iswspace(ch)) {
245                                         if ((width = wcwidth(ch)) > 0)
246                                                   cur_col += width;
247                                         continue;
248                               }
249                               if (!pass_unknown_seqs)
250                                         continue;
251                     }
252 
253                     /* Must stuff ch in a line - are we at the right one? */
254                     if (cur_line + adjust != this_line) {
255                               LINE *lnew;
256 
257                               /* round up to next line */
258                               adjust = !fine && (cur_line & 1);
259 
260                               if (cur_line + adjust < this_line) {
261                                         while (cur_line + adjust < this_line &&
262                                             l->l_prev != NULL) {
263                                                   l = l->l_prev;
264                                                   this_line--;
265                                         }
266                                         if (cur_line + adjust < this_line) {
267                                                   if (nflushd_lines == 0) {
268                                                             /*
269                                                              * Allow backup past first
270                                                              * line if nothing has been
271                                                              * flushed yet.
272                                                              */
273                                                             while (cur_line + adjust
274                                                                 < this_line) {
275                                                                       lnew = alloc_line();
276                                                                       l->l_prev = lnew;
277                                                                       lnew->l_next = l;
278                                                                       l = lines = lnew;
279                                                                       extra_lines++;
280                                                                       this_line--;
281                                                             }
282                                                   } else {
283                                                             if (!warned++)
284                                                                       dowarn(cur_line);
285                                                             cur_line = this_line - adjust;
286                                                   }
287                                         }
288                               } else {
289                                         /* may need to allocate here */
290                                         while (cur_line + adjust > this_line) {
291                                                   if (l->l_next == NULL) {
292                                                             l->l_next = alloc_line();
293                                                             l->l_next->l_prev = l;
294                                                   }
295                                                   l = l->l_next;
296                                                   this_line++;
297                                         }
298                               }
299                               if (this_line > nflushd_lines &&
300                                   this_line - nflushd_lines >=
301                                   max_bufd_lines + BUFFER_MARGIN) {
302                                         if (extra_lines) {
303                                                   flush_lines(extra_lines);
304                                                   extra_lines = 0;
305                                         }
306                                         flush_lines(this_line - nflushd_lines -
307                                             max_bufd_lines);
308                                         nflushd_lines = this_line - max_bufd_lines;
309                               }
310                     }
311                     /* grow line's buffer? */
312                     if (l->l_line_len + 1 >= l->l_lsize) {
313                               int need;
314 
315                               need = l->l_lsize ? l->l_lsize * 2 : 90;
316                               if ((l->l_line = realloc(l->l_line,
317                                   (unsigned)need * sizeof(CHAR))) == NULL)
318                                         err(EXIT_FAILURE, NULL);
319                               l->l_lsize = need;
320                     }
321                     c = &l->l_line[l->l_line_len++];
322                     c->c_char = ch;
323                     c->c_set = cur_set;
324                     c->c_column = cur_col;
325                     c->c_width = wcwidth(ch);
326                     /*
327                      * If things are put in out of order, they will need sorting
328                      * when it is flushed.
329                      */
330                     if (cur_col < l->l_max_col)
331                               l->l_needs_sort = 1;
332                     else
333                               l->l_max_col = cur_col;
334                     if (c->c_width > 0)
335                               cur_col += c->c_width;
336           }
337           if (ferror(stdin))
338                     err(EXIT_FAILURE, NULL);
339           if (extra_lines) {
340                     /*
341                      * Extra lines only exist if no lines have been flushed
342                      * yet. This means that 'lines' must point to line zero
343                      * after we flush the extra lines.
344                      */
345                     flush_lines(extra_lines);
346                     l = lines;
347                     this_line = 0;
348           }
349 
350           /* goto the last line that had a character on it */
351           for (; l->l_next; l = l->l_next)
352                     this_line++;
353           flush_lines(this_line - nflushd_lines + 1);
354 
355           /* make sure we leave things in a sane state */
356           if (last_set != CS_NORMAL)
357                     PUTC(SI);
358 
359           /* flush out the last few blank lines */
360           if (max_line >= this_line)
361                     nblank_lines = max_line - this_line + (max_line & 1);
362           if (nblank_lines == 0)
363                     /* end with a newline even if the source doesn't */
364                     nblank_lines = 2;
365           flush_blanks();
366           exit(EXIT_SUCCESS);
367 }
368 
369 /*
370  * Prints the first 'nflush' lines. Printed lines are freed.
371  * After this function returns, 'lines' points to the first
372  * of the remaining lines, and 'nblank_lines' will have the
373  * number of half line feeds between the final flushed line
374  * and the first remaining line.
375  */
376 static void
flush_lines(int nflush)377 flush_lines(int nflush)
378 {
379           LINE *l;
380 
381           while (--nflush >= 0) {
382                     l = lines;
383                     lines = l->l_next;
384                     if (l->l_line) {
385                               flush_blanks();
386                               flush_line(l);
387                               free(l->l_line);
388                     }
389                     if (l->l_next)
390                               nblank_lines++;
391                     free_line(l);
392           }
393           if (lines)
394                     lines->l_prev = NULL;
395 }
396 
397 /*
398  * Print a number of newline/half newlines.
399  * nblank_lines is the number of half line feeds.
400  */
401 static void
flush_blanks(void)402 flush_blanks(void)
403 {
404           int half, i, nb;
405 
406           half = 0;
407           nb = nblank_lines;
408           if (nb & 1) {
409                     if (fine)
410                               half = 1;
411                     else
412                               nb++;
413           }
414           nb /= 2;
415           for (i = nb; --i >= 0;)
416                     PUTC('\n');
417           if (half) {
418                     PUTC(ESC);
419                     PUTC(FHLF);
420                     if (!nb)
421                               PUTC('\r');
422           }
423           nblank_lines = 0;
424 }
425 
426 /*
427  * Write a line to stdout taking care of space to tab conversion (-h flag)
428  * and character set shifts.
429  */
430 static void
flush_line(LINE * l)431 flush_line(LINE *l)
432 {
433           CHAR *c, *endc;
434           int i, j, nchars, last_col, save, this_col, tot;
435 
436           last_col = 0;
437           nchars = l->l_line_len;
438 
439           if (l->l_needs_sort) {
440                     static CHAR *sorted;
441                     static int count_size, *count, sorted_size;
442 
443                     /*
444                      * Do an O(n) sort on l->l_line by column being careful to
445                      * preserve the order of characters in the same column.
446                      */
447                     if (l->l_lsize > sorted_size) {
448                               sorted_size = l->l_lsize;
449                               if ((sorted = realloc(sorted,
450                                   sizeof(CHAR) * (size_t)sorted_size)) == NULL)
451                                         err(EXIT_FAILURE, NULL);
452                     }
453                     if (l->l_max_col >= count_size) {
454                               count_size = l->l_max_col + 1;
455                               if ((count = realloc(count,
456                                   sizeof(int) * (size_t)count_size)) == NULL)
457                                         err(EXIT_FAILURE, NULL);
458                     }
459                     memset(count, 0, sizeof(int) * (size_t)l->l_max_col + 1);
460                     for (i = nchars, c = l->l_line; --i >= 0; c++)
461                               count[c->c_column]++;
462 
463                     /*
464                      * calculate running total (shifted down by 1) to use as
465                      * indices into new line.
466                      */
467                     for (tot = 0, i = 0; i <= l->l_max_col; i++) {
468                               save = count[i];
469                               count[i] = tot;
470                               tot += save;
471                     }
472 
473                     for (i = nchars, c = l->l_line; --i >= 0; c++)
474                               sorted[count[c->c_column]++] = *c;
475                     c = sorted;
476           } else
477                     c = l->l_line;
478           while (nchars > 0) {
479                     this_col = c->c_column;
480                     endc = c;
481                     do {
482                               ++endc;
483                     } while (--nchars > 0 && this_col == endc->c_column);
484 
485                     /* if -b only print last character */
486                     if (no_backspaces) {
487                               c = endc - 1;
488                               if (nchars > 0 &&
489                                   this_col + c->c_width > endc->c_column)
490                                         continue;
491                     }
492 
493                     if (this_col > last_col) {
494                               int nspace = this_col - last_col;
495 
496                               if (compress_spaces && nspace > 1) {
497                                         while (1) {
498                                                   int tab_col, tab_size;
499 
500                                                   tab_col = (last_col + 8) & ~7;
501                                                   if (tab_col > this_col)
502                                                             break;
503                                                   tab_size = tab_col - last_col;
504                                                   if (tab_size == 1)
505                                                             PUTC(' ');
506                                                   else
507                                                             PUTC('\t');
508                                                   nspace -= tab_size;
509                                                   last_col = tab_col;
510                                         }
511                               }
512                               while (--nspace >= 0)
513                                         PUTC(' ');
514                               last_col = this_col;
515                     }
516 
517                     for (;;) {
518                               if (c->c_set != last_set) {
519                                         switch (c->c_set) {
520                                         case CS_NORMAL:
521                                                   PUTC(SI);
522                                                   break;
523                                         case CS_ALTERNATE:
524                                                   PUTC(SO);
525                                         }
526                                         last_set = c->c_set;
527                               }
528                               PUTC(c->c_char);
529                               if ((c + 1) < endc)
530                                         for (j = 0; j < c->c_width; j++)
531                                                   PUTC('\b');
532                               if (++c >= endc)
533                                         break;
534                     }
535                     last_col += (c - 1)->c_width;
536           }
537 }
538 
539 /*
540  * Increment or decrement a line number, checking for overflow.
541  * Stop one below INT_MAX such that the adjust variable is safe.
542  */
543 void
addto_lineno(int * lno,int offset)544 addto_lineno(int *lno, int offset)
545 {
546           if (offset > 0) {
547                     if (*lno >= INT_MAX - offset)
548                               errx(EXIT_FAILURE, "too many lines");
549           } else {
550                     if (*lno < INT_MIN - offset)
551                               errx(EXIT_FAILURE, "too many reverse line feeds");
552           }
553           *lno += offset;
554 }
555 
556 #define   NALLOC 64
557 
558 static LINE *line_freelist;
559 
560 static LINE *
alloc_line(void)561 alloc_line(void)
562 {
563           LINE *l;
564           int i;
565 
566           if (!line_freelist) {
567                     if ((l = realloc(NULL, sizeof(LINE) * NALLOC)) == NULL)
568                               err(EXIT_FAILURE, NULL);
569                     line_freelist = l;
570                     for (i = 1; i < NALLOC; i++, l++)
571                               l->l_next = l + 1;
572                     l->l_next = NULL;
573           }
574           l = line_freelist;
575           line_freelist = l->l_next;
576 
577           memset(l, 0, sizeof(LINE));
578           return (l);
579 }
580 
581 static void
free_line(LINE * l)582 free_line(LINE *l)
583 {
584 
585           l->l_next = line_freelist;
586           line_freelist = l;
587 }
588 
589 static void
usage(void)590 usage(void)
591 {
592 
593           (void)fprintf(stderr, "Usage: %s [-bfhpx] [-l nline]\n", getprogname());
594           exit(EXIT_FAILURE);
595 }
596 
597 static void
dowarn(int line)598 dowarn(int line)
599 {
600 
601           warnx("warning: can't back up %s",
602                     line < 0 ? "past first line" : "-- line already flushed");
603 }
604