1 |
/* $MidnightBSD$ */ |
2 |
/* |
3 |
* Copyright (c) 1989, 1993 |
4 |
* The Regents of the University of California. All rights reserved. |
5 |
* |
6 |
* Redistribution and use in source and binary forms, with or without |
7 |
* modification, are permitted provided that the following conditions |
8 |
* are met: |
9 |
* 1. Redistributions of source code must retain the above copyright |
10 |
* notice, this list of conditions and the following disclaimer. |
11 |
* 2. Redistributions in binary form must reproduce the above copyright |
12 |
* notice, this list of conditions and the following disclaimer in the |
13 |
* documentation and/or other materials provided with the distribution. |
14 |
* 4. Neither the name of the University nor the names of its contributors |
15 |
* may be used to endorse or promote products derived from this software |
16 |
* without specific prior written permission. |
17 |
* |
18 |
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
19 |
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 |
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
21 |
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
22 |
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 |
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
24 |
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
25 |
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
26 |
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
27 |
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
28 |
* SUCH DAMAGE. |
29 |
*/ |
30 |
|
31 |
#ifndef lint |
32 |
#if 0 |
33 |
static char sccsid[] = "@(#)parse.c 8.1 (Berkeley) 6/6/93"; |
34 |
#endif |
35 |
#endif /* not lint */ |
36 |
#include <sys/cdefs.h> |
37 |
__FBSDID("$FreeBSD: stable/10/usr.bin/hexdump/parse.c 262926 2014-03-08 12:24:47Z brueffer $"); |
38 |
|
39 |
#include <sys/types.h> |
40 |
|
41 |
#include <err.h> |
42 |
#include <fcntl.h> |
43 |
#include <stdio.h> |
44 |
#include <stdlib.h> |
45 |
#include <ctype.h> |
46 |
#include <string.h> |
47 |
#include "hexdump.h" |
48 |
|
49 |
FU *endfu; /* format at end-of-data */ |
50 |
|
51 |
void |
52 |
addfile(char *name) |
53 |
{ |
54 |
unsigned char *p; |
55 |
FILE *fp; |
56 |
int ch; |
57 |
char buf[2048 + 1]; |
58 |
|
59 |
if ((fp = fopen(name, "r")) == NULL) |
60 |
err(1, "%s", name); |
61 |
while (fgets(buf, sizeof(buf), fp)) { |
62 |
if (!(p = strchr(buf, '\n'))) { |
63 |
warnx("line too long"); |
64 |
while ((ch = getchar()) != '\n' && ch != EOF); |
65 |
continue; |
66 |
} |
67 |
*p = '\0'; |
68 |
for (p = buf; *p && isspace(*p); ++p); |
69 |
if (!*p || *p == '#') |
70 |
continue; |
71 |
add(p); |
72 |
} |
73 |
(void)fclose(fp); |
74 |
} |
75 |
|
76 |
void |
77 |
add(const char *fmt) |
78 |
{ |
79 |
unsigned const char *p, *savep; |
80 |
static FS **nextfs; |
81 |
FS *tfs; |
82 |
FU *tfu, **nextfu; |
83 |
|
84 |
/* start new linked list of format units */ |
85 |
if ((tfs = calloc(1, sizeof(FS))) == NULL) |
86 |
err(1, NULL); |
87 |
if (!fshead) |
88 |
fshead = tfs; |
89 |
else |
90 |
*nextfs = tfs; |
91 |
nextfs = &tfs->nextfs; |
92 |
nextfu = &tfs->nextfu; |
93 |
|
94 |
/* take the format string and break it up into format units */ |
95 |
for (p = fmt;;) { |
96 |
/* skip leading white space */ |
97 |
for (; isspace(*p); ++p); |
98 |
if (!*p) |
99 |
break; |
100 |
|
101 |
/* allocate a new format unit and link it in */ |
102 |
if ((tfu = calloc(1, sizeof(FU))) == NULL) |
103 |
err(1, NULL); |
104 |
*nextfu = tfu; |
105 |
nextfu = &tfu->nextfu; |
106 |
tfu->reps = 1; |
107 |
|
108 |
/* if leading digit, repetition count */ |
109 |
if (isdigit(*p)) { |
110 |
for (savep = p; isdigit(*p); ++p); |
111 |
if (!isspace(*p) && *p != '/') |
112 |
badfmt(fmt); |
113 |
/* may overwrite either white space or slash */ |
114 |
tfu->reps = atoi(savep); |
115 |
tfu->flags = F_SETREP; |
116 |
/* skip trailing white space */ |
117 |
for (++p; isspace(*p); ++p); |
118 |
} |
119 |
|
120 |
/* skip slash and trailing white space */ |
121 |
if (*p == '/') |
122 |
while (isspace(*++p)); |
123 |
|
124 |
/* byte count */ |
125 |
if (isdigit(*p)) { |
126 |
for (savep = p; isdigit(*p); ++p); |
127 |
if (!isspace(*p)) |
128 |
badfmt(fmt); |
129 |
tfu->bcnt = atoi(savep); |
130 |
/* skip trailing white space */ |
131 |
for (++p; isspace(*p); ++p); |
132 |
} |
133 |
|
134 |
/* format */ |
135 |
if (*p != '"') |
136 |
badfmt(fmt); |
137 |
for (savep = ++p; *p != '"';) |
138 |
if (*p++ == 0) |
139 |
badfmt(fmt); |
140 |
if (!(tfu->fmt = malloc(p - savep + 1))) |
141 |
err(1, NULL); |
142 |
(void) strlcpy(tfu->fmt, savep, p - savep + 1); |
143 |
escape(tfu->fmt); |
144 |
p++; |
145 |
} |
146 |
} |
147 |
|
148 |
static const char *spec = ".#-+ 0123456789"; |
149 |
|
150 |
int |
151 |
size(FS *fs) |
152 |
{ |
153 |
FU *fu; |
154 |
int bcnt, cursize; |
155 |
unsigned char *fmt; |
156 |
int prec; |
157 |
|
158 |
/* figure out the data block size needed for each format unit */ |
159 |
for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) { |
160 |
if (fu->bcnt) { |
161 |
cursize += fu->bcnt * fu->reps; |
162 |
continue; |
163 |
} |
164 |
for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) { |
165 |
if (*fmt != '%') |
166 |
continue; |
167 |
/* |
168 |
* skip any special chars -- save precision in |
169 |
* case it's a %s format. |
170 |
*/ |
171 |
while (strchr(spec + 1, *++fmt)); |
172 |
if (*fmt == '.' && isdigit(*++fmt)) { |
173 |
prec = atoi(fmt); |
174 |
while (isdigit(*++fmt)); |
175 |
} |
176 |
switch(*fmt) { |
177 |
case 'c': |
178 |
bcnt += 1; |
179 |
break; |
180 |
case 'd': case 'i': case 'o': case 'u': |
181 |
case 'x': case 'X': |
182 |
bcnt += 4; |
183 |
break; |
184 |
case 'e': case 'E': case 'f': case 'g': case 'G': |
185 |
bcnt += 8; |
186 |
break; |
187 |
case 's': |
188 |
bcnt += prec; |
189 |
break; |
190 |
case '_': |
191 |
switch(*++fmt) { |
192 |
case 'c': case 'p': case 'u': |
193 |
bcnt += 1; |
194 |
break; |
195 |
} |
196 |
} |
197 |
} |
198 |
cursize += bcnt * fu->reps; |
199 |
} |
200 |
return (cursize); |
201 |
} |
202 |
|
203 |
void |
204 |
rewrite(FS *fs) |
205 |
{ |
206 |
enum { NOTOKAY, USEBCNT, USEPREC } sokay; |
207 |
PR *pr, **nextpr; |
208 |
FU *fu; |
209 |
unsigned char *p1, *p2, *fmtp; |
210 |
char savech, cs[3]; |
211 |
int nconv, prec; |
212 |
size_t len; |
213 |
|
214 |
prec = 0; |
215 |
|
216 |
for (fu = fs->nextfu; fu; fu = fu->nextfu) { |
217 |
/* |
218 |
* Break each format unit into print units; each conversion |
219 |
* character gets its own. |
220 |
*/ |
221 |
nextpr = &fu->nextpr; |
222 |
for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) { |
223 |
if ((pr = calloc(1, sizeof(PR))) == NULL) |
224 |
err(1, NULL); |
225 |
*nextpr = pr; |
226 |
|
227 |
/* Skip preceding text and up to the next % sign. */ |
228 |
for (p1 = fmtp; *p1 && *p1 != '%'; ++p1); |
229 |
|
230 |
/* Only text in the string. */ |
231 |
if (!*p1) { |
232 |
pr->fmt = fmtp; |
233 |
pr->flags = F_TEXT; |
234 |
break; |
235 |
} |
236 |
|
237 |
/* |
238 |
* Get precision for %s -- if have a byte count, don't |
239 |
* need it. |
240 |
*/ |
241 |
if (fu->bcnt) { |
242 |
sokay = USEBCNT; |
243 |
/* Skip to conversion character. */ |
244 |
for (++p1; strchr(spec, *p1); ++p1); |
245 |
} else { |
246 |
/* Skip any special chars, field width. */ |
247 |
while (strchr(spec + 1, *++p1)); |
248 |
if (*p1 == '.' && isdigit(*++p1)) { |
249 |
sokay = USEPREC; |
250 |
prec = atoi(p1); |
251 |
while (isdigit(*++p1)); |
252 |
} else |
253 |
sokay = NOTOKAY; |
254 |
} |
255 |
|
256 |
p2 = *p1 ? p1 + 1 : p1; /* Set end pointer -- make sure |
257 |
* that it's non-NUL/-NULL first |
258 |
* though. */ |
259 |
cs[0] = *p1; /* Set conversion string. */ |
260 |
cs[1] = '\0'; |
261 |
|
262 |
/* |
263 |
* Figure out the byte count for each conversion; |
264 |
* rewrite the format as necessary, set up blank- |
265 |
* padding for end of data. |
266 |
*/ |
267 |
switch(cs[0]) { |
268 |
case 'c': |
269 |
pr->flags = F_CHAR; |
270 |
switch(fu->bcnt) { |
271 |
case 0: case 1: |
272 |
pr->bcnt = 1; |
273 |
break; |
274 |
default: |
275 |
p1[1] = '\0'; |
276 |
badcnt(p1); |
277 |
} |
278 |
break; |
279 |
case 'd': case 'i': |
280 |
pr->flags = F_INT; |
281 |
goto isint; |
282 |
case 'o': case 'u': case 'x': case 'X': |
283 |
pr->flags = F_UINT; |
284 |
isint: cs[2] = '\0'; |
285 |
cs[1] = cs[0]; |
286 |
cs[0] = 'q'; |
287 |
switch(fu->bcnt) { |
288 |
case 0: case 4: |
289 |
pr->bcnt = 4; |
290 |
break; |
291 |
case 1: |
292 |
pr->bcnt = 1; |
293 |
break; |
294 |
case 2: |
295 |
pr->bcnt = 2; |
296 |
break; |
297 |
default: |
298 |
p1[1] = '\0'; |
299 |
badcnt(p1); |
300 |
} |
301 |
break; |
302 |
case 'e': case 'E': case 'f': case 'g': case 'G': |
303 |
pr->flags = F_DBL; |
304 |
switch(fu->bcnt) { |
305 |
case 0: case 8: |
306 |
pr->bcnt = 8; |
307 |
break; |
308 |
case 4: |
309 |
pr->bcnt = 4; |
310 |
break; |
311 |
default: |
312 |
if (fu->bcnt == sizeof(long double)) { |
313 |
cs[2] = '\0'; |
314 |
cs[1] = cs[0]; |
315 |
cs[0] = 'L'; |
316 |
pr->bcnt = sizeof(long double); |
317 |
} else { |
318 |
p1[1] = '\0'; |
319 |
badcnt(p1); |
320 |
} |
321 |
} |
322 |
break; |
323 |
case 's': |
324 |
pr->flags = F_STR; |
325 |
switch(sokay) { |
326 |
case NOTOKAY: |
327 |
badsfmt(); |
328 |
case USEBCNT: |
329 |
pr->bcnt = fu->bcnt; |
330 |
break; |
331 |
case USEPREC: |
332 |
pr->bcnt = prec; |
333 |
break; |
334 |
} |
335 |
break; |
336 |
case '_': |
337 |
++p2; |
338 |
switch(p1[1]) { |
339 |
case 'A': |
340 |
endfu = fu; |
341 |
fu->flags |= F_IGNORE; |
342 |
/* FALLTHROUGH */ |
343 |
case 'a': |
344 |
pr->flags = F_ADDRESS; |
345 |
++p2; |
346 |
switch(p1[2]) { |
347 |
case 'd': case 'o': case'x': |
348 |
cs[0] = 'q'; |
349 |
cs[1] = p1[2]; |
350 |
cs[2] = '\0'; |
351 |
break; |
352 |
default: |
353 |
p1[3] = '\0'; |
354 |
badconv(p1); |
355 |
} |
356 |
break; |
357 |
case 'c': |
358 |
pr->flags = F_C; |
359 |
/* cs[0] = 'c'; set in conv_c */ |
360 |
goto isint2; |
361 |
case 'p': |
362 |
pr->flags = F_P; |
363 |
cs[0] = 'c'; |
364 |
goto isint2; |
365 |
case 'u': |
366 |
pr->flags = F_U; |
367 |
/* cs[0] = 'c'; set in conv_u */ |
368 |
isint2: switch(fu->bcnt) { |
369 |
case 0: case 1: |
370 |
pr->bcnt = 1; |
371 |
break; |
372 |
default: |
373 |
p1[2] = '\0'; |
374 |
badcnt(p1); |
375 |
} |
376 |
break; |
377 |
default: |
378 |
p1[2] = '\0'; |
379 |
badconv(p1); |
380 |
} |
381 |
break; |
382 |
default: |
383 |
p1[1] = '\0'; |
384 |
badconv(p1); |
385 |
} |
386 |
|
387 |
/* |
388 |
* Copy to PR format string, set conversion character |
389 |
* pointer, update original. |
390 |
*/ |
391 |
savech = *p2; |
392 |
p1[0] = '\0'; |
393 |
len = strlen(fmtp) + strlen(cs) + 1; |
394 |
if ((pr->fmt = calloc(1, len)) == NULL) |
395 |
err(1, NULL); |
396 |
snprintf(pr->fmt, len, "%s%s", fmtp, cs); |
397 |
*p2 = savech; |
398 |
pr->cchar = pr->fmt + (p1 - fmtp); |
399 |
fmtp = p2; |
400 |
|
401 |
/* Only one conversion character if byte count. */ |
402 |
if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++) |
403 |
errx(1, "byte count with multiple conversion characters"); |
404 |
} |
405 |
/* |
406 |
* If format unit byte count not specified, figure it out |
407 |
* so can adjust rep count later. |
408 |
*/ |
409 |
if (!fu->bcnt) |
410 |
for (pr = fu->nextpr; pr; pr = pr->nextpr) |
411 |
fu->bcnt += pr->bcnt; |
412 |
} |
413 |
/* |
414 |
* If the format string interprets any data at all, and it's |
415 |
* not the same as the blocksize, and its last format unit |
416 |
* interprets any data at all, and has no iteration count, |
417 |
* repeat it as necessary. |
418 |
* |
419 |
* If, rep count is greater than 1, no trailing whitespace |
420 |
* gets output from the last iteration of the format unit. |
421 |
*/ |
422 |
for (fu = fs->nextfu; fu; fu = fu->nextfu) { |
423 |
if (!fu->nextfu && fs->bcnt < blocksize && |
424 |
!(fu->flags&F_SETREP) && fu->bcnt) |
425 |
fu->reps += (blocksize - fs->bcnt) / fu->bcnt; |
426 |
if (fu->reps > 1) { |
427 |
for (pr = fu->nextpr;; pr = pr->nextpr) |
428 |
if (!pr->nextpr) |
429 |
break; |
430 |
for (p1 = pr->fmt, p2 = NULL; *p1; ++p1) |
431 |
p2 = isspace(*p1) ? p1 : NULL; |
432 |
if (p2) |
433 |
pr->nospace = p2; |
434 |
} |
435 |
} |
436 |
#ifdef DEBUG |
437 |
for (fu = fs->nextfu; fu; fu = fu->nextfu) { |
438 |
(void)printf("fmt:"); |
439 |
for (pr = fu->nextpr; pr; pr = pr->nextpr) |
440 |
(void)printf(" {%s}", pr->fmt); |
441 |
(void)printf("\n"); |
442 |
} |
443 |
#endif |
444 |
} |
445 |
|
446 |
void |
447 |
escape(char *p1) |
448 |
{ |
449 |
char *p2; |
450 |
|
451 |
/* alphabetic escape sequences have to be done in place */ |
452 |
for (p2 = p1;; p1++, p2++) { |
453 |
if (*p1 == '\\') { |
454 |
p1++; |
455 |
switch(*p1) { |
456 |
case '\0': |
457 |
*p2 = '\\'; |
458 |
*++p2 = '\0'; |
459 |
return; |
460 |
case 'a': |
461 |
/* *p2 = '\a'; */ |
462 |
*p2 = '\007'; |
463 |
break; |
464 |
case 'b': |
465 |
*p2 = '\b'; |
466 |
break; |
467 |
case 'f': |
468 |
*p2 = '\f'; |
469 |
break; |
470 |
case 'n': |
471 |
*p2 = '\n'; |
472 |
break; |
473 |
case 'r': |
474 |
*p2 = '\r'; |
475 |
break; |
476 |
case 't': |
477 |
*p2 = '\t'; |
478 |
break; |
479 |
case 'v': |
480 |
*p2 = '\v'; |
481 |
break; |
482 |
default: |
483 |
*p2 = *p1; |
484 |
break; |
485 |
} |
486 |
} else { |
487 |
*p2 = *p1; |
488 |
if (*p1 == '\0') |
489 |
return; |
490 |
} |
491 |
} |
492 |
} |
493 |
|
494 |
void |
495 |
badcnt(char *s) |
496 |
{ |
497 |
errx(1, "%s: bad byte count", s); |
498 |
} |
499 |
|
500 |
void |
501 |
badsfmt(void) |
502 |
{ |
503 |
errx(1, "%%s: requires a precision or a byte count"); |
504 |
} |
505 |
|
506 |
void |
507 |
badfmt(const char *fmt) |
508 |
{ |
509 |
errx(1, "\"%s\": bad format", fmt); |
510 |
} |
511 |
|
512 |
void |
513 |
badconv(char *ch) |
514 |
{ |
515 |
errx(1, "%%%s: bad conversion character", ch); |
516 |
} |