All pastes #1966514 Raw Edit

revised od.c program

public c v1 · immutable
#1966514 ·published 2010-10-19 03:47 UTC
rendered paste body
/* * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. * You may only use this file in accordance with the terms version * 1.0 of the CDDL. * * A full copy of the text of the CDDL should have accompanied this * source.  A copy of the CDDL is also available via the Internet * http://www.illumos.org/license/CDDL. *//* * Copyright 2010 Nexenta Systems, Inc.  All rights reserved. *//* * od - octal dump.  Not really just octal anymore; read the POSIX * specification for it -- its more complex than you think! * * NB: We followed the POSIX semantics fairly strictly, where the * legacy code's behavior was in conflict.  In many cases the legacy * Solaris code was so completely broken as to be completely unusable. * (For example, the long double support was broken beyond * imagination!)   Note that GNU coreutils violates POSIX in a few * interesting ways, such as changing the numbering of the addresses * when skipping.  (Address starts should always be at 0, according * to the sample output in the Open Group man page.) */#include <stdio.h>#include <stdlib.h>#include <sys/types.h>#include <string.h>#include <err.h>#include <wchar.h>#include <locale.h>#include <unistd.h>#include <sys/stat.h>#define	_(x)	gettext(x)/* address format */static char *afmt  =	"%07o";static char *cfmt  =    "       ";static FILE *input = NULL;static int lcm = 1;static int blocksize = 16;static int numfiles = 0;static int curfile = 0;static char **files = NULL;static off_t limit = -1;/* * We need lookahead of an extra block to support multibyte chars.  We * also have a look behind so that we can avoid printing lines that * are identical to what we've already printed.  We'll use memalign to * make sure it is properly aligned. * * The block size is determined by the least common multiple of the * data items being displayed.  Usually it will be 16, but sometimes * it is 24 (when 12-byte long doubles are presented.) */typedef struct buffer {	char	*data;	int	prod;	int	cons;	int	mask;	int	size;	int	navail;		/* total bytes avail */} buffer_t;typedef struct output {	int		ncol;	void		(*func)(struct buffer *, int);	struct output	*next;} output_t;/* * Specifiers */typedef unsigned char		u8;typedef unsigned short		u16;typedef unsigned int		u32;typedef unsigned long long	u64;typedef char			s8;typedef short			s16;typedef int			s32;typedef long long		s64;typedef float			fF;typedef	double			fD;typedef long double		fL;static voidusage(void){	(void) fprintf(stderr, _("usage: od [-bcCdDfFoOsSvxX] "	    "[-t types ]... [-A base] [-j skip] [-N count] [file]...\n"));	exit(1);}#define	DECL_GET(typ)							\static typ								\get_ ## typ(buffer_t *b, int index)					\{									\	typ val = *(typ *)(void *)(b->data + index);			\	return (val);							\}DECL_GET(u8)DECL_GET(u16)DECL_GET(u32)DECL_GET(u64)DECL_GET(s8)DECL_GET(s16)DECL_GET(s32)DECL_GET(s64)DECL_GET(fF)DECL_GET(fD)DECL_GET(fL)#define	DECL_OUT(nm, typ, fmt)					\static void							\do_ ## nm(buffer_t *buf, int index)				\{								\	typ v = get_ ## typ(buf, index);			\	(void) printf(fmt, v);					\}								\								\static output_t output_ ## nm =  {				\	sizeof (typ), do_ ## nm					\};DECL_OUT(oct_b, u8, " %03o")DECL_OUT(oct_w, u16, " %06ho")DECL_OUT(oct_d, u32, " %011o")DECL_OUT(oct_q, u64, " %022llo")DECL_OUT(dec_b, u8, " %03u")DECL_OUT(dec_w, u16, " %05hu")DECL_OUT(dec_d, u32, " %010u")DECL_OUT(dec_q, u64, " %020llu")DECL_OUT(sig_b, s8, " %03d")DECL_OUT(sig_w, s16, " %6.05hd")DECL_OUT(sig_d, s32, " %11.010d")DECL_OUT(sig_q, s64, " %20.019lld")DECL_OUT(hex_b, u8, " %02x")DECL_OUT(hex_w, u16, " %04hx")DECL_OUT(hex_d, s32, " %08x")DECL_OUT(hex_q, s64, " %016llx")DECL_OUT(float, fF, " %14.7e")DECL_OUT(double, fD, " %21.14e")DECL_OUT(ldouble, fL, " %24.14Le")static char *ascii[] = {	"nul", "soh", "stx", "etx", "eot", "enq", "ack", " be",	" bs", " ht", " lf", " vt", " ff", " cr", " so", " si",	"dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",	"can", " em", "sub", "esc", " fs", " gs", " rs", " us",	" sp", "  !", "  \"", "  #", "  $", "  %", "  &", "  '",	"  (", "  )", "  *", "  +", "  ,", "  -", "  .", "  /",	"  0", "  1", "  2", "  3", "  4", "  5", "  6", "  7",	"  8", "  9", "  :", "  ;", "  <", "  =", "  >", "  ?",	"  @", "  A", "  B", "  C", "  D", "  E", "  F", "  G",	"  H", "  I", "  J", "  K", "  L", "  M", "  N", "  O",	"  P", "  Q", "  R", "  S", "  T", "  U", "  V", "  W",	"  X", "  Y", "  Z", "  [", "  \\", "  ]", "  ^", "  _",	"  `", "  a", "  b", "  c", "  d", "  e", "  f", "  g",	"  h", "  i", "  j", "  k", "  l", "  m", "  n", "  o",	"  p", "  q", "  r", "  s", "  t", "  u", "  v", "  w",	"  x", "  y", "  z", "  {", "  |", "  }", "  ~", "del"};static voiddo_ascii(buffer_t *buf, int index){	uint8_t v = get_u8(buf, index);	(void) fputc(' ', stdout);	(void) fputs(ascii[v & 0x7f], stdout);}static output_t output_ascii = {	1, do_ascii,};static voiddo_char(buffer_t *buf, int index){	static int	nresid = 0;	static int	printable = 0;	int		cnt;	int		avail;	int		nb;	char		scratch[10];	wchar_t		wc;	int		which;	uint8_t v = get_u8(buf, index);	/*	 * If there were residual bytes from an earlier	 * character, then just display the ** continuation	 * indication.	 */	if (nresid) {		if (printable) {			(void) fputs("  **", stdout);		} else {			(void) printf(" %03o", v);		}		nresid--;		return;	}	/*	 * Peek ahead up to MB_CUR_MAX characters.  This has to be	 * done carefully because we might need to look into the next	 * block to really know for sure.	 */	scratch[0] = v;	avail = buf->navail;	if (avail > MB_CUR_MAX)		avail = MB_CUR_MAX;	for (cnt = 1, which = index + 1; cnt < avail; cnt++, which++) {		scratch[cnt] = buf->data[which & buf->mask];	}	/* now see if the value is a real character */	nresid = 0;	wc = 0;	nb = mbtowc(&wc, scratch, avail);	if (nb < 0) {		(void) printf(" %03o", v);		return;	}	if (nb == 0) {		(void) fputs("  \\0", stdout);		return;	}	nresid = nb - 1;	if (nb && iswprint(wc)) {		scratch[nb] = 0;		(void) fputs("   ", stdout);		(void) fputs(scratch, stdout);		printable = 1;		return;	}	printable = 0;	if (wc == 0) {		(void) fputs("  \\0", stdout);	} else if (wc == '\b') {		(void) fputs("  \\b", stdout);	} else if (wc == '\f') {		(void) fputs("  \\f", stdout);	} else if (wc == '\n') {		(void) fputs("  \\n", stdout);	} else if (wc == '\r') {		(void) fputs("  \\r", stdout);	} else if (wc == '\t') {		(void) fputs("  \\t", stdout);	} else {		(void) printf(" %03o", v);	}}static output_t output_char = {	1, do_char,};/* * List of output formatting structures. */static output_t *head = NULL;static output_t **tailp = &head;static voidadd_out(output_t *src){	output_t	*out;	int		m;	if ((out = calloc(1, sizeof (*src))) == NULL) {		err(1, "malloc");	}	m = lcm;	while ((m % src->ncol) != 0) {		m += lcm;	}	lcm = m;	blocksize = lcm;	while (blocksize < 16)		blocksize *= 2;	(void) memcpy(out, src, sizeof (*src));	*tailp = out;	tailp = &out->next;}static FILE *next_input(void){	for (;;) {		if (curfile >= numfiles)			return (NULL);		if (input) {			if ((input = freopen(files[curfile], "r", input)) !=			    NULL) {				curfile++;				return (input);			}		} else {			if ((input = fopen(files[curfile], "r")) != NULL) {				curfile++;				return (input);			}		}		warn("open: %s", files[curfile]);		curfile++;	}}static voidrefill(buffer_t *b){	int	n;	int	want;	int	zero;	/*	 * If we have 2 blocks bytes available, we're done.  Note that each	 * iteration usually loads up 16 bytes, unless we run out of	 * data.	 */	while (input && (b->navail < (2 * blocksize))) {		/* we preload the next one in advance */		if (limit == 0) {			(void) fclose(input);			input = NULL;			continue;		}		/* we want to read a whole block if possible */		want = blocksize;		if ((limit >= 0) && (want > limit)) {			want = limit;		}		zero = blocksize;		while (want && input) {			int	c;			b->prod &= b->mask;			c = (b->prod + want > (b->mask + 1)) ?			    b->mask - b->prod :			    want;			n = fread(b->data + b->prod, 1, c, input);			if (n < 0) {				warn("read: %s",				    files ? files[curfile-1] : "stdin");				input = next_input();				continue;			}			if (n == 0) {				input = next_input();				continue;			}			if (limit >= 0)				limit -= n;			b->navail += n;			b->prod += n;			want -= n;			zero -= n;		}		while (zero) {			b->data[b->prod & b->mask] = 0;			b->prod++;			b->prod &= b->mask;			zero--;		}	}}#define	STR1	"C1"#define	STR2	"S2"#ifdef	_LP64#define	STR8	"L8"#define	STR4	"I4"#else#define	STR8	"8"#define	STR4	"IL4"#endifstatic voiddo_type_string(char *typestr){	if (*typestr == 0) {		errx(1, _("missing type string"));	}	while (*typestr) {		switch (*typestr) {		case 'a':			typestr++;			add_out(&output_ascii);			break;		case 'c':			add_out(&output_char);			typestr++;			break;		case 'f':			typestr++;			switch (*typestr) {			case 'F':			case '4':				add_out(&output_float);				typestr++;				break;			case '8':			case 'D':				add_out(&output_double);				typestr++;				break;			case 'L':				add_out(&output_ldouble);				typestr++;				break;			default:				add_out(&output_float);				break;			}			break;		case 'd':			typestr++;			if (strchr(STR1, *typestr)) {				typestr++;				add_out(&output_sig_b);			} else if (strchr(STR2, *typestr)) {				typestr++;				add_out(&output_sig_w);			} else if (strchr(STR4, *typestr)) {				typestr++;				add_out(&output_sig_d);			} else if (strchr(STR8, *typestr)) {				typestr++;				add_out(&output_sig_q);			} else {				add_out(&output_sig_d);			}			break;		case 'u':			typestr++;			if (strchr(STR1, *typestr)) {				typestr++;				add_out(&output_dec_b);			} else if (strchr(STR2, *typestr)) {				typestr++;				add_out(&output_dec_w);			} else if (strchr(STR4, *typestr)) {				typestr++;				add_out(&output_dec_d);			} else if (strchr(STR8, *typestr)) {				typestr++;				add_out(&output_dec_q);			} else {				add_out(&output_dec_d);			}			break;		case 'o':			typestr++;			if (strchr(STR1, *typestr)) {				typestr++;				add_out(&output_oct_b);			} else if (strchr(STR2, *typestr)) {				typestr++;				add_out(&output_oct_w);			} else if (strchr(STR4, *typestr)) {				typestr++;				add_out(&output_oct_d);			} else if (strchr(STR8, *typestr)) {				typestr++;				add_out(&output_oct_q);			} else {				add_out(&output_oct_d);			}			break;		case 'x':			typestr++;			if (strchr(STR1, *typestr)) {				typestr++;				add_out(&output_hex_b);			} else if (strchr(STR2, *typestr)) {				typestr++;				add_out(&output_hex_w);			} else if (strchr(STR4, *typestr)) {				typestr++;				add_out(&output_hex_d);			} else if (strchr(STR8, *typestr)) {				typestr++;				add_out(&output_hex_q);			} else {				add_out(&output_hex_d);			}			break;		default:			errx(1, _("unrecognized type string character: %c"),			    *typestr);			exit(1);		}	}}intmain(int argc, char **argv){	int		c;	int		i;	buffer_t	buffer;	int		first = 1;	int		doall = 0;	int		same = 0;	off_t		offset = 0;	off_t		skip = 0;	char		*eptr;	input = stdin;	(void) setlocale(LC_ALL, "");	while ((c = getopt(argc, argv, "A:bCcdDfFj:N:oOsSxXvt:")) != EOF) {		switch (c) {		case 'A':			if (strlen(optarg) > 1) {				afmt = NULL;			}			switch (*optarg) {			case 'o':				afmt = "%07llo";				cfmt = "       ";				break;			case 'd':				afmt = "%07lld";				cfmt = "       ";				break;			case 'x':				afmt = "%07llx";				cfmt = "       ";				break;			case 'n':				/*				 * You could argue that the code should				 * use the same 7 spaces.  Legacy uses 8				 * though.  Oh well.  Better to avoid				 * gratuitous change.				 */				afmt = "        ";				cfmt = "        ";				break;			default:				afmt = NULL;				break;			}			if (strlen(optarg) != 1) {				afmt = NULL;			}			if (afmt == NULL)				warnx(_("invalid address base, "				    "must be o, d, x, or n"));			break;		case 'b':			add_out(&output_oct_b);			break;		case 'c':		case 'C':			add_out(&output_char);			break;		case 'f':			add_out(&output_float);			break;		case 'F':			add_out(&output_double);			break;		case 'd':			add_out(&output_dec_w);			break;		case 'D':			add_out(&output_dec_d);			break;		case 't':			do_type_string(optarg);			break;		case 'o':			add_out(&output_oct_w);			break;		case 'O':			add_out(&output_oct_d);			break;		case 's':			add_out(&output_sig_w);			break;		case 'S':			add_out(&output_sig_d);			break;		case 'x':			add_out(&output_hex_w);			break;		case 'X':			add_out(&output_hex_d);			break;		case 'v':			doall++;			break;		case 'j':			skip = strtoll(optarg, &eptr, 0);			if (*eptr == 'b') {				skip <<= 9;	/* 512 bytes */				eptr++;			} else if (*eptr == 'k') {				skip <<= 10;	/* 1k */				eptr++;			} else if (*eptr == 'm') {				skip <<= 20;	/* 1m */				eptr++;			} else if (*eptr == 'g') {				skip <<= 30;	/* 1g */				eptr++;			}			if ((skip < 0) || (eptr[0] != 0)) {				warnx(_("invalid skip count '%s' specified"),				    optarg);				exit(1);			}			break;		case 'N':			limit = strtoll(optarg, &eptr, 0);			/*			 * POSIX doesn't specify this, but I think these			 * may be helpful.			 */			if (*eptr == 'b') {				limit <<= 9;				eptr++;			} else if (*eptr == 'k') {				limit <<= 10;				eptr++;			} else if (*eptr == 'm') {				limit <<= 20;				eptr++;			} else if (*eptr == 'g') {				limit <<= 30;				eptr++;			}			if ((limit < 0) || (eptr[0] != 0)) {				warnx(_("invalid byte count '%s' specified"),				    optarg);				exit(1);			}			break;		default:			usage();			break;		}	}	/* this finds the smallest power of two size we can use */	buffer.size = (1 << (ffs(blocksize * 3) + 1));	buffer.mask = buffer.size - 1;	buffer.data = memalign(16, buffer.size);	if (buffer.data == NULL) {		err(1, "memalign");	}	/*	 * Allocate an array for all the input files.	 */	if (argc > optind) {		files = calloc(sizeof (char *), argc - optind);		for (i = 0; i < argc - optind; i++) {			files[i] = argv[optind + i];			numfiles++;		}		input = next_input();	} else {		input = stdin;	}	/*	 * We need to seek ahead.  fseek would be faster.	 */	while (skip && input) {		struct stat sbuf;		/*		 * Only fseek() on regular files.  (Others		 * we have to read().		 */		if (fstat(fileno(input), &sbuf) < 0) {			warn("fstat: %s", files[curfile-1]);			input = next_input();			continue;		}		if (S_ISREG(sbuf.st_mode)) {			/*			 * No point in seeking a file that is too			 * short to begin with.			 */			if (sbuf.st_size < skip) {				skip -= sbuf.st_size;				input = next_input();				continue;			}			if (fseeko(input, skip, SEEK_SET) < 0) {				err(1, "fseek:%s", files[curfile-1]);			}			/* Done seeking. */			skip = 0;			break;		}		/*		 * fgetc seems like it would be slow, but it uses		 * buffered I/O, so it should be fast enough.		 */		while (skip) {			if (fgetc(input) == EOF) {				if (ferror(input)) {					warn("read: %s", files[curfile-1]);				}				input = next_input();				break;			}			skip--;		}	}	if (head == NULL) {		add_out(&output_oct_w);	}	buffer.navail = 0;	buffer.prod = 0;	buffer.cons = 0;	for (refill(&buffer); buffer.navail > 0; refill(&buffer)) {		output_t *out;		int	mx;		int	j, k;		/*		 * If this buffer was the same as last, then just		 * dump an asterisk.		 */		if ((!first) && (buffer.navail >= blocksize) && (!doall)) {			j = buffer.cons;			k = j - blocksize;			for (i = 0; i < blocksize; i++) {				if (buffer.data[j & buffer.mask] !=				    buffer.data[k & buffer.mask]) {					break;				}				j++;				k++;			}			if (i == blocksize) {				if (!same) {					(void) fputs("*\n", stdout);					same = 1;				}				buffer.navail -= blocksize;				offset += blocksize;				buffer.cons += blocksize;				buffer.cons &= buffer.mask;				continue;			}		}		first = 0;		same = 0;		mx = (buffer.navail > blocksize) ? blocksize : buffer.navail;		for (out = head; out != NULL; out = out->next) {			if (out == head) {				(void) printf(afmt, offset);			} else {				(void) fputs(cfmt, stdout);			}			for (i = 0, j = buffer.cons; i < mx; i += out->ncol) {				out->func(&buffer, j);				j += out->ncol;				j &= buffer.mask;			}			(void) fputs("\n", stdout);		}		buffer.cons += mx;		buffer.cons &= buffer.mask;		offset += mx;		buffer.navail -= mx;	}	(void) printf(afmt, offset);	(void) fputs("\n", stdout);	return (0);}