// Version 0.44 /* * This is dvii, a freely redistributable TeX dvi information tool. * It is (C) Copyright 1999, 2000, 2001, 2002 by Adam H. Lewenberg. * You may modify and use this program as long as you send modifications * to Adam Lewenberg. It can be included in any distribution, * commercial or otherwise, so long as the banner string defined below is * not modified (except for the version number) and the banner string below * is printed on program invocation, or can be printed on program invocation * with the -? option. (Above language adapted from T. Rokicki's dvips.) * * For a history of changes, search below for HISTORY. * * For a list of acknowledgements, search below for ACKNOWLEDGEMENTS. */ #define BANNER "This is dvii 0.44 (DVI file information) by Adam Lewenberg" /* #undef NDEBUG turns ON debugging, while #define'ing it turns debugging OFF. */ #define NDEBUG #ifdef __BORLANDC__ #if sizeof(int) < 2 #error Size of int should be at least two bytes! #endif #if sizeof(long) < 4 #error Size of long should be at least four bytes! #endif #endif /* There can be problems if dvii.c is compiled on a 64-bit machine: in that case int will (probably) be 4 bytes and long 8 bytes. The read_*_byte routines will fail in this case when reading negative parameters. We handle this by adding a compile-time option SIXTY_FOUR_BIT that indicates we are on a 64-bit machine. When you compile on such a machine, add the option -DSIXTY_FOUR_BIT. On all machines, no matter the word length, we want the following to be true: S4 will be a signed data type that is 4 bytes long. U4 will be an unsigned data type that is 4 bytes long. S2 will be a signed data type that is 2 bytes long. U2 will be an unsigned data type that is 2 bytes long. Thanks to Tom Kacvinsky for reporting this and suggesting the fix. */ #ifdef SIXTY_FOUR_BIT typedef int S4 ; typedef unsigned int U4 ; typedef short int S2 ; typedef unsigned short int U2 ; #else typedef long S4 ; typedef unsigned long U4 ; typedef short int S2 ; typedef unsigned short int U2 ; #endif /* -- Includes -- */ #include #include #include #include #include /* In case someone has a non-standard stdio.h */ /* Suggested by Karsten Tinnefeld */ #ifndef SEEK_SET #define SEEK_SET 0 #endif #ifndef SEEK_CUR #define SEEK_CUR 1 #endif #ifndef SEEK_END #define SEEK_END 2 #endif #include #ifdef NAME_MAX #define FNL_MAX NAME_MAX /* Maximum length of a filename */ #else #define FNL_MAX 128 /* Maximum length of a filename */ #endif #define VERSION "0.44 (02 May 2002)" #define MESSAGE_DIGEST_VERSION "simple sum" #define EMAIL "adam@macrotex.net" #define SPMEMMAX 128 /* Default number of bytes of \special text to display */ #define ABORT_MSG " Exiting." #define FALSE 0 #define TRUE 1 /* Because some older systems do not have full ANSI definitions, we check if CLOCKS_PER_SEC is defined. If not, we simply disable the timer code. */ #include #ifdef CLOCKS_PER_SEC #define ENABLE_TIMING #endif /* Needed dvi file op codes. */ /* Zero byte opcodes. */ #define OC_SC0 0 #define OC_SC1 1 #define OC_SC127 127 #define OC_NOP 138 #define OC_EOP 140 #define OC_PUSH 141 #define OC_POP 142 #define OC_W0 147 #define OC_X0 152 #define OC_Y0 161 #define OC_Z0 166 #define OC_FNTN0 171 #define OC_FNTN1 172 #define OC_FNTN127 234 #define OC_POSTPOST 249 /* One byte opcodes. */ #define OC_SC128 128 #define OC_PUT1 133 #define OC_RGHT1 143 #define OC_W1 148 #define OC_X1 153 #define OC_DOWN1 157 #define OC_Y1 162 #define OC_Z1 167 #define OC_FNT1 235 /* Two byte opcodes. */ #define OC_SC129 129 #define OC_PUT2 134 #define OC_RGHT2 144 #define OC_W2 149 #define OC_X2 154 #define OC_DOWN2 158 #define OC_Y2 163 #define OC_Z2 168 #define OC_FNT2 236 /* Three byte opcodes. */ #define OC_SC130 130 #define OC_PUT3 135 #define OC_RGHT3 145 #define OC_W3 150 #define OC_X3 155 #define OC_DOWN3 159 #define OC_Y3 164 #define OC_Z3 169 #define OC_FNT3 237 /* Four byte opcodes. */ #define OC_SC131 131 #define OC_PUT4 136 #define OC_RGHT4 146 #define OC_W4 151 #define OC_X4 156 #define OC_DOWN4 160 #define OC_Y4 165 #define OC_Z4 170 #define OC_FNT4 238 /* Eight byte opcodes. */ #define OC_SR 132 #define OC_PR 137 /* 44 bytes */ #define OC_BOP 139 /* Variable-length byte opcodes. */ #define OC_XXX1 239 #define OC_XXX2 240 #define OC_XXX3 241 #define OC_XXX4 242 #define OC_FD1 243 #define OC_FD2 244 #define OC_FD3 245 #define OC_FD4 246 #define OC_PRE 247 #define OC_POST 248 /* Note that according to DVI spec, OC_POST is a 0-byte opcode, but for our purposes it _acts_ like a variable-length opcode. */ /* Control character positions. */ #define CHAR_CCSTART 0 #define CHAR_CCEND 32 #define CHAR_CCXX1 127 /* This structure stores the TeX page and the file offset of the bop opcode that marks the start of that page. */ struct page_list { int TeX_page ; S4 offset ; } ; typedef struct page_list pageinfo ; /* This structure stores the font information for the Font Hash Table. */ struct font_entry { int font_id ; char *name ; int scale ; int used ; struct font_entry *next ; } ; typedef struct font_entry FONTELEMENT ; typedef FONTELEMENT *FONTLINK ; /* The postamble information structure. */ struct p_info { int page_count ; int number_of_fonts ; } ; typedef struct p_info post_info ; /* Function prototypes. */ void type_check(void) ; int valid_dvi_file(FILE * dvifile, int level, int dump_op_codes) ; S4 postamble_offset(FILE * dvi_file) ; void get_fonts(FILE * dvi_file, char * DVI_file_font_list) ; void print_page_list(pageinfo * pagelist, FILE * dvifile, int do_checksum, int md_flags) ; //void print_page_list(pageinfo * pagelist, FILE * dvifile) ; void backspace(FILE * dvifile) ; S4 parse_pages(FILE * dvi_file, pageinfo * pages, int num_pages) ; int parse_specials(FILE * dvifile, S4 pageone_offset, int show_specials) ; inline int param_length3(S2 opcode) ; int arg_length(FILE * dvifile) ; int read_pre_opcode(FILE * dvifile, char * comment_string) ; int read_post_opcode(FILE * dvifile, post_info * p, int show_fonts_flag) ; U4 read_special_opcode(FILE * dvifile, int opcode, int show_specials) ; int read_fontdef_opcode(FILE * dvifile, int opcode) ; int read_fnt_def_in_post(FILE * dvi_file, int show_fonts_flag) ; int clean_chars(char * array_of_chars, int length) ; S2 read_next_opcode(FILE * dvifile, int show_specials, int track_pages) ; void print_use_string(void) ; void print_options_string(void) ; int read_one_byte(S2 * one_byte, FILE * dvifile) ; int read_two_bytes(S2 * two_bytes, FILE * dvifile) ; int read_three_bytes(S4 * three_bytes, FILE * dvifile) ; int read_four_bytes(S4 * four_bytes, FILE * dvifile) ; S4 S4_min(S4 a, S4 b) ; unsigned int closest_power_of_2(unsigned int n) ; double floor(double x) ; // What to ignore in message digest functions. #define MD_NOTHING 0 // Ignore nothing, i.e., accept everything #define MD_FONTS 1 // Ignore all fonts and font assignments #define MD_NOPS 2 // Ignore NOPS #define MD_SPECIALS 4 // Ignore specials #define MD_STACK 8 // Ignore push'es and pop's #define MD_MAXFLAG 15 // Sum of the above void message_digest1(S4 file_offset_start, S4 file_offset_end, FILE * dvifile, char * checksum) ; void message_digest2(S4 file_offset_start, S4 file_offset_end, FILE * dvifile, char * checksum, int md_flags) ; // Hash functions FONTLINK font_hash (int key) ; int add_font_hash (int key, char * font_string, int scale) ; void print_font_hash_table () ; void print_used_fonts () ; /* Timer functions. */ void start_time(void) ; double prn_time(void) ; /* Debugging functions. */ #ifdef NDEBUG #define F_TELL(s) #else void F_TELL(FILE * f) ; #endif /* GLOBAL variables. */ int CurrentPhysicalPage ; S4 CurrentTeXPage ; S4 CurrentFontNumber ; int Special_Text_Length = SPMEMMAX ; S4 PostambleOffset ; int Number_Of_Pages ; int Number_Of_Fonts ; int FontHashTableSize ; // The length of our font hash table. FONTLINK FontHashTable ; //The head of our FontHashTable. int NumberOfCollisions = 0 ; //Just for fun. /* GLOBAL options. These are global options that can be set on the command line, or by another function. */ int SHOW_CONTROLCHARS ; /* Show any control characters encountered. */ int opt_CURFONT ; /* Keep track of the current font number. */ int opt_REMOVECC = FALSE ; /* Remove control codes from special text when printing to screen. */ int opt_CHECKSUM = 0 ; int opt_SHOWFONTSONEACHPAGE = 0 ; int opt_SHOWPHYSICALPAGES = 1 ; // We normally show physical pages. int verbose ; /* Give verbose output (for debugging purposes). */ #define DOTDVISTRING ".dvi" /* *************************************************************** */ int main(int argc, char *argv[]) { char filename[FNL_MAX] ; /* Maximum length of input file name. */ FILE * dvifile ; char comment_string[256] ; /* Maximum length of comment string. */ char next_char ; int ret_value ; long DVI_file_length ; long DVI_file_length_K ; int i = 0 ; struct stat stat_buf ; pageinfo * pages = (pageinfo *) NULL ; S4 page_one_offset ; post_info postamble_info ; int opt_FLAG = 0 ; int opt_SPECIALS = 0 ; int opt_COUNTSPECIALS = 0 ; int opt_FONTS = 0 ; int opt_PAGES = 0 ; int opt_SUMMARY = 0 ; int opt_VALIDITY = 0 ; // Report validity on output. int opt_MOREVALIDITY = 0 ; int opt_DUMPOPCODES = 0 ; int opt_MDFLAGS = 0 ; #ifdef ENABLE_TIMING int opt_TIMER = 0 ; #endif int valid_level = 0 ; verbose = 0 ; /* 0. Do some type checking. */ type_check() ; /* 1. Parse command line. */ filename[0] = '\0' ; /* If there are not enough arguments, print the usage information. */ if (argc <= 1) { print_use_string() ; exit (0) ; } /* The first argument that is NOT preceded with a '-' is assumed to be the filename and further command line parsing is terminated. */ for (i=1; i= '0') && (next_char <= '9')) || (next_char == '-')) { // Read number Special_Text_Length = atoi(argv[i]) ; opt_SPECIALS = 1 ; } else { printf("The -n option must be followed by an integer" " (no spaces)." ABORT_MSG "\n"); exit (0) ; } break ; /* -m do checksum (message digest) for each page. Force opt_PAGES = 1 ; */ case 'm': opt_CHECKSUM = 1 ; opt_PAGES = 1 ; opt_FLAG = 1 ; break ; case 'M': // Do message digest but ignore some of the op_codes. // An integer had better follow n, else error. opt_CHECKSUM = 1 ; opt_PAGES = 1 ; opt_FLAG = 1 ; // Get integer following '-M' next_char = *(++argv[i]) ; if ( ((next_char >= '0') && (next_char <= '9')) || (next_char == '-')) { // Read number opt_MDFLAGS = atoi(argv[i]) ; } else { printf("The -M option must be followed by an integer" " (no spaces)." ABORT_MSG "\n"); exit (0) ; } if (opt_MDFLAGS > MD_MAXFLAG) { printf("The maximum number allowed after -M is %d.", MD_MAXFLAG) ; printf(ABORT_MSG "\n"); exit (0) ; } break ; case 'S': // Implies a summary opt_COUNTSPECIALS = 1 ; opt_SUMMARY = 1 ; opt_FLAG = 1 ; break ; case 'P': // suppress the display of physical pages opt_SHOWPHYSICALPAGES = 0 ; break ; default: printf("Unrecognized command line option '%s'.\n", argv[i]) ; print_use_string() ; exit (0) ; break ; } } else { /* This must be the filename, so grab it and stop parsing command line. */ (void) strcpy(filename, argv[i]) ; break ; } } /* If filename is empty we must not have found a filename, so abort. */ if (filename[0] == '\0') { printf("No file specified." ABORT_MSG "\n") ; exit (0) ; } /* If no command line options set, set many things to 1. */ if (opt_FLAG != 1 ) { opt_SPECIALS = 1 ; opt_FONTS = 1 ; opt_PAGES = 1 ; opt_SUMMARY = 1 ; } #ifdef ENABLE_TIMING if (opt_TIMER) start_time() ; #endif /* 2. Open file for read-only. Note that on DOS/Windows machines the 'b' is relevant and necessary. */ dvifile = fopen(filename, "rb") ; if (dvifile == (FILE *) NULL) /* Try appending '.dvi. */ { /* Append '.dvi'. */ (void) strcat(filename, DOTDVISTRING) ; dvifile = fopen(filename, "rb") ; } if (dvifile == (FILE *) NULL) { printf("Cannot find file %s or %s.dvi." ABORT_MSG "\n", argv[i], argv[i]) ; exit (0) ; } /* 3. VALIDITY CHECK. Always check, but report only if opt_VALIDITY = 1 ; */ ret_value = valid_dvi_file(dvifile, valid_level, opt_DUMPOPCODES) ; if (ret_value == 0) { if (opt_VALIDITY) { printf("dvi file '%s' passed validation check (level %d).\n", filename, valid_level) ; } } else { printf("File %s does not appear to be a valid dvi file.\n", filename) ; exit (0) ; } /* 4. FILE STATISTICS */ if (opt_SUMMARY) { /* Get file length (in bytes). */ stat(filename, &stat_buf); DVI_file_length = (long) stat_buf.st_size ; DVI_file_length_K = (long) (DVI_file_length / 1024) ; if ((DVI_file_length_K == (long)0) && (DVI_file_length > (long)0)) DVI_file_length_K = 1 ; printf("File size: %ld bytes (%ld K)\n", DVI_file_length, DVI_file_length_K) ; } /* 5. PARSE PREAMBLE. */ if (opt_SUMMARY) { (void) read_pre_opcode(dvifile, comment_string) ; printf("Comment string: %s\n", comment_string) ; } /* 6. PARSE POSTAMBLE. */ /* 5a. Get the postamble offset. */ PostambleOffset = postamble_offset(dvifile) ; fseek(dvifile, PostambleOffset, SEEK_SET) ; (void) read_post_opcode(dvifile, &postamble_info, FALSE) ; Number_Of_Pages = postamble_info.page_count ; Number_Of_Fonts = postamble_info.number_of_fonts ; if (opt_SHOWFONTSONEACHPAGE) { /* Set the size of the font hash table; we want it to be the smallest power of 2 >= max(number of fonts, 128). */ if (Number_Of_Fonts < 129) { FontHashTableSize = 128 ; } else { /* Get the closest power of 2 to Number_Of_Fonts (i.e., 2^(floor(log_2(Number_Of_Fonts))) without using the log function explicitly. */ FontHashTableSize = closest_power_of_2((unsigned int) Number_Of_Fonts) ; } #ifndef NDEBUG printf ("Allocating %d slots for the FontHashTable\n", FontHashTableSize ) ; #endif // Allocate the memory. FontHashTable = calloc(FontHashTableSize, sizeof(FONTELEMENT)) ; // Fill array with NULLS? for (i=0; i font '#' with 'name' scaled at 'mag'" "\n" "'p:[real page/TeX page]' -> 'real page' has \\count0 = 'TeX page'" "\n" "'s:[real page/TeX page]::text' -> special on 'real page'/'TeX page' with" "\n" " initial text of 'text'" "\n" ; const char options[] = "All the command line options:" "\n" " -c : perform simple validity check" "\n" " -C : perform more rigorous (and slower) validity check" "\n" " -d : dump opcodes (implies -C)" "\n" " -f : display fonts" "\n" " -F : display fonts on each page" "\n" " -g : suppress control characters when showing special text" "\n" " -h : show main help screen" "\n" " -H : show options help screen (this screen)" "\n" " -m : add message digest information when displaying pages" "\n" " -M#: same as -m except ignore some of the operators" "\n" " -M0=same as -m; -M1=ignore font information; -M2=ignore NOPS;" "\n" " -M4=ignore specials; -M8=ignore pushes and pops; " "\n" " Add to combine (e.g., -M5 ignores fonts and specials)" "\n" " -n#: display # bytes of special text; if # is -1, show all text" "\n" " -p : display pages" "\n" " -P : suppress the display of physical pages" "\n" " -s : display specials" "\n" " -S : show number of specials in summary" "\n" " -T : give timing information" "\n" " -u : display summary" "\n" " -v : verbose mode" "\n" ; void print_use_string() { printf("%s", usage) ; } void print_options_string() { printf("%s", options) ; } /* *************************************************************** */ // FUNCTION type_check /* Do some checking on the size of int and long. On 32-bit machines, check that int is (exactly) 2 bytes and that long is (exactly) 4 bytes. On 64-bit machines, check that int is (exactly) 4 bytes and that short int is (exactly) 2 bytes. */ #define TC_ERROR "FATAL type check error: " void type_check() { if (sizeof(U4) != 4) { printf(TC_ERROR) ; printf("sizeof(U4)=%d, but I was expecting 4.\n", (int) sizeof(U4)) ; if (sizeof(U4) > 4) { printf("Try re-compiling with -DSIXTY_FOUR_BIT\n") ; } exit (-1) ; } if (sizeof(U2) != 2) { printf(TC_ERROR) ; printf("sizeof(U2)=%d, but I was expecting 2.\n", (int) sizeof(U2)) ; exit (-1) ; } return ; } /* *************************************************************** */ // FUNCTION valid_dvi_file #define DVI_VALID_ERROR "[dvi validation error] " #define DVICHECK_OK " dvi validation check: " /* Verify that this is a valid dvi file. level is one of: 0 : fast, shallow check (check only pre- and postamble) 1 : slower, more careful check (do above plus parse each opcode) */ int valid_dvi_file(FILE * dvifile, int level, int dump_op_codes) { S2 one_byte ; S2 opcode ; // S4 q ; // S4 last_pagepointer_read ; // S4 ppointer ; int found_post_id, twotwothree ; /* 1. The first two bytes should be 247 2 (octal 367 002) (see section 15 of dvitype.web). */ rewind(dvifile) ; read_one_byte(&one_byte, dvifile) ; if (one_byte != 247) { printf(DVI_VALID_ERROR) ; printf("first byte read is %d but should be 247 (octal 367).\n", one_byte) ; exit (-1) ; } else { if (verbose) { printf(DVICHECK_OK) ; printf("first byte is 247\n") ; } } read_one_byte(&one_byte, dvifile) ; if (one_byte != (S2) 2) { printf(DVI_VALID_ERROR) ; printf("second byte should be 2.\n") ; exit (-1) ; } else { if (verbose) { printf(DVICHECK_OK) ; printf("second byte is 2\n") ; } } /* 2. Go to the postamble. */ fseek (dvifile, 0, SEEK_END) ; /* Read 233's from end of file. There need to be at least 4. */ found_post_id = 0 ; twotwothree = 0 ; /* Number of 233's found. */ while (found_post_id == 0) { fseek (dvifile, -1, SEEK_CUR) ; read_one_byte(&one_byte, dvifile) ; fseek (dvifile, -1, SEEK_CUR) ; if (one_byte == (S2) 223) { ++twotwothree ; if (verbose) { printf(DVICHECK_OK) ; printf("read a 223 at end of file\n") ; } } else { if ((unsigned int) one_byte == 2) { found_post_id = 1 ; if (verbose) { printf(DVICHECK_OK) ; printf("found post opcode\n") ; } } else { printf(DVI_VALID_ERROR) ; printf("missing postamble id (should be 2)\n") ; exit(-1) ; } } } /* Check that there were at least four 233's. */ if (twotwothree < 4) { printf(DVI_VALID_ERROR) ; printf("The last 4 bytes should be 233; found only %d.\n", twotwothree) ; exit (-1) ; } else { if (verbose) { printf(DVICHECK_OK) ; printf("found at least 4 bytes of 233's\n") ; } } /* 3. If we are doing a level 1 validity check, go to the first page and keep reading opcodes until the postamble is reached. But first we have to _get_ to the first page. */ if (level > 0) { // Go to where the first opcode (pre) should be. rewind(dvifile) ; /* Read opcodes until we get to the end of the file. */ opcode = (S2) 0 ; do { opcode = read_next_opcode(dvifile, 0, 0) ; // If we are dumping opcodes, dump this one. if (dump_op_codes) { printf("o:%d\n" , opcode) ; } } while ((opcode >= (S2) 0) && (opcode != (S2) OC_POST)); } return 0 ; } /* *************************************************************** */ // FUNCTION read_one_byte /* Read the byte that the file pointer is currently pointing at. Place this byte in one_byte. The file pointer ends up pointing at the next byte. What happens if there is no 'next byte'? Return 0 if no error, -1 otherwise. */ int read_one_byte(S2 * one_byte, FILE * dvi_file) { *one_byte = (S2) getc(dvi_file) ; /* getc instead of fgetc */ /* Let's live dangerously. if (feof(dvi_file)) { return -1 ; } *one_byte = ret_value ; */ return 0 ; } /* Read two bytes. */ int read_two_bytes(S2 * two_bytes, FILE * dvi_file) { int t0, t1 ; t1 = fgetc(dvi_file) ; if (feof(dvi_file)) { printf("[read_two_bytes]: EOF on dvi file encountered (byte 0).\n") ; exit (-1) ; } t0 = fgetc(dvi_file) ; if (feof(dvi_file)) { printf("[read_two_bytes]: EOF on dvi file encountered (byte 1).\n") ; exit (-1) ; } *two_bytes = ((U2)t1 << 8) + (U2)t0 ; return 0 ; } /* Read three bytes. */ int read_three_bytes(S4 * three_bytes, FILE * dvi_file) { unsigned int t0, t1, t2 ; t2 = fgetc(dvi_file) ; if (feof(dvi_file)) { printf("[read_three_bytes]: EOF on dvi file encountered (byte 2).\n") ; exit (-1) ; } t1 = fgetc(dvi_file) ; if (feof(dvi_file)) { printf("[read_three_bytes]: EOF on dvi file encountered (byte 1).\n") ; exit (-1) ; } t0 = fgetc(dvi_file) ; if (feof(dvi_file)) { printf("[read_three_bytes]: EOF on dvi file encountered (byte 0).\n") ; exit (-1) ; } *three_bytes = (S4) (((U4)t2 << 16) + ((U4)t1 << 8) + (U4)t0) ; return 0 ; } /* Read four bytes. */ int read_four_bytes(S4 * four_bytes, FILE * dvi_file) { unsigned int t0, t1, t2, t3 ; t3 = fgetc(dvi_file) ; if (feof(dvi_file)) { printf("[read_four_bytes]: EOF on dvi file encountered (byte 3).\n") ; exit (-1) ; } t2 = fgetc(dvi_file) ; if (feof(dvi_file)) { printf("[read_four_bytes]: EOF on dvi file encountered (byte 2).\n") ; exit (-1) ; } t1 = fgetc(dvi_file) ; if (feof(dvi_file)) { printf("[read_four_bytes]: EOF on dvi file encountered (byte 1).\n") ; exit (-1) ; } t0 = fgetc(dvi_file) ; if (feof(dvi_file)) { printf("[read_four_bytes]: EOF on dvi file encountered (byte 0).\n") ; exit (-1) ; } *four_bytes = (S4) (((U4)t3 << 24)+((U4)t2 << 16)+((U4)t1 << 8)+(U4)t0) ; return 0 ; } /* *************************************************************** */ // FUNCTION postamble_offset /* Get postamble offset, that is, the file position where the OC_POST opcode occurs. Returns the offset. */ S4 postamble_offset(FILE * dvifile) { S2 one_byte ; S4 q ; /* Go to the last byte of the file. */ fseek (dvifile, -1, SEEK_END) ; /* Back up past the 223's. */ do { (void) read_one_byte(&one_byte, dvifile) ; /* Backup 2 spaces. */ fseek (dvifile, -2, SEEK_CUR) ; } while (one_byte == 223) ; /* At this point, we should have read a '2', and the file pointer is pointing at the last byte of postamble offset. So we back up 3 more spaces to get to the first byte of q. */ /* Back up 3 more bytes to get to the postamble pointer. */ fseek (dvifile, -3L, SEEK_CUR) ; /* Get postamble offset by shifting. */ (void) read_four_bytes(&q, dvifile) ; return q ; } /* *************************************************************** */ // FUNCTION parse_pages /* Traverse file parsing pages. The definition of the dvi file format specifies that each BOP (beginning of page) opcode point to the _previous_ BOP (allowing the dvi file to be processed in reverse page order for output devices that output the page first). As such, the fastest way to find page information in forward order is to traverse the file in reverse, storing the page information in a dynamically allocated array. This array must be allocated _before_ parse_pages is called. Because the number of pages is specified in the postamble, the number of pages can be found without parsing the entire file. pages an array of type pageinfo for storing page information If pages is NULL, the function does not attempt to store the information (uesful for finding the first BOP). RETURNS: the byte offset (relative to the start of the file) of the _first_ BOP. */ S4 parse_pages(FILE * dvifile, pageinfo * pages, int num_pages) { S4 ppointer, last_ppointer, temp ; S4 q ; S4 counts[10] ; U4 current_physical_page ; S2 opcode ; int i ; //#ifdef SIXTY_FOUR_BITS // char *fmt = "p:[%d]: <%d/%d>\n"; //#else // char *fmt = "p:[%d]: <%d/%ld>\n"; //#endif /* Start over. */ rewind(dvifile) ; /* Get to the postamble. */ q = postamble_offset(dvifile) ; fseek (dvifile, q, SEEK_SET) ; /* Get the pointer to the last page (bop). */ fseek (dvifile, 1, SEEK_CUR) ; (void) read_four_bytes(&ppointer, dvifile) ; /* We travel through the file in REVERSE. Thus, we need to save the page information so that it can be printed in correct order. */ /* Loop as long as there is a page to go to. */ current_physical_page = num_pages ; while (ppointer != -1) { --current_physical_page ; /* Back up to previous bop. */ fseek(dvifile, ppointer, SEEK_SET) ; /* Save the offset for this page so later we can go back to it easily, but only if the array pages is not NULL. */ if (pages != (pageinfo *) NULL) { pages[current_physical_page].offset = ppointer ; } /* Read op code and verify that it is OC_BOP. */ read_one_byte(&opcode, dvifile) ; if (opcode != OC_BOP) { printf("bop code expected but not found\n") ; } /* Read the next 10 4-byte values for \count0 through \count9. */ for (i=0; i<10; ++i) { read_four_bytes(&temp, dvifile) ; counts[i] = temp ; } if (pages != (pageinfo *) NULL) { pages[current_physical_page].TeX_page = counts[0] ; } /* Save the last poitner and read the next pointer. */ last_ppointer = ppointer ; read_four_bytes(&ppointer, dvifile) ; } // if (verbose) // { // for (i=0; i= OC_XXX1) && (opcode <= OC_XXX4)) ++number_of_specials ; } while ((opcode >= (S2) 0) && (opcode != (S2) OC_POST)); return number_of_specials ; } /* *************************************************************** */ // FUNCTION print_page_list void print_page_list(pageinfo * pagelist, FILE * dvifile, int do_checksum, int md_flags) { int i, j, skip_chars ; int current_font ; FONTLINK cur_FONTLINK ; char checksum[33] = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" ; S2 opcode ; S2 one_byte ; S2 two_bytes ; S4 three_bytes ; S4 four_bytes ; //md_flag message_digest_flag = MD_IGFONTS ; if (do_checksum) { printf("[message digest: simple sum") ; if (md_flags > 0) { printf(" (ignoring") ; if (md_flags & MD_FONTS) { printf(" fonts") ; } if (md_flags & MD_NOPS) { printf(" nops") ; } if (md_flags & MD_SPECIALS) { printf(" specials") ; } if (md_flags & MD_STACK) { printf(" stack") ; } printf(")]\n") ; } else { printf("]\n") ; } for (i=0; i used = 0 ; cur_FONTLINK = cur_FONTLINK -> next ; } } // Set skip_chars to 0 skip_chars = 0 ; // Goto Keep_going goto End_of_Page ; } // b0. zero-byte font commands. if ((opcode >= OC_FNTN0) && (opcode <= OC_FNTN127)) { current_font = opcode - 171 ; //printf ("current_font is %d\n", current_font) ; skip_chars = 0 ; // Has this font already been used? if (font_hash(current_font) -> used) { skip_chars = 1 ; } goto Keep_going ; } // b1. one-byte font commands. if (opcode == OC_FNT1) { // Backup one byte and read the byte. fseek (dvifile, -1, SEEK_CUR) ; read_one_byte (&one_byte, dvifile) ; current_font = (int) one_byte ; skip_chars = 0 ; // Has this font already been used? if (font_hash(current_font) -> used) { skip_chars = 1 ; } goto Keep_going ; } // b2. two-byte font commands. if (opcode == OC_FNT2) { // Backup two bytes and read the bytes. fseek (dvifile, -2, SEEK_CUR) ; read_two_bytes (&two_bytes, dvifile) ; current_font = two_bytes ; skip_chars = 0 ; // Has this font already been used? if (font_hash(current_font) -> used) { skip_chars = 1 ; } goto Keep_going ; } // b3. three-byte font commands. if (opcode == OC_FNT3) { printf("Cannot handle 3-byte fonts. Exiting.\n") ; exit (-1) ; // Backup three bytes and read the bytes. fseek (dvifile, -3, SEEK_CUR) ; read_three_bytes (&three_bytes, dvifile) ; current_font = three_bytes ; skip_chars = 0 ; // Has this font already been used? if (font_hash(current_font) -> used) { skip_chars = 1 ; } goto Keep_going ; } // b4. four-byte font commands. if (opcode == OC_FNT4) { printf("Cannot handle 4-byte fonts. Exiting.\n") ; exit (-1) ; // Backup four bytes and read the bytes. fseek (dvifile, -4, SEEK_CUR) ; read_four_bytes (&four_bytes, dvifile) ; current_font = four_bytes ; skip_chars = 0 ; // Has this font already been used? if (font_hash(current_font) -> used) { skip_chars = 1 ; } goto Keep_going ; } // c. if we are skipping characters, go back to start. if (skip_chars) { goto Keep_going ; } // d. We are not skipping chars. Is this a char? if ((opcode >= OC_SC0) && (opcode <= OC_SC131)) { // Set the used field for this font. font_hash(current_font) -> used = 1 ; // Start skipping chars. skip_chars = 1 ; goto Keep_going ; } // e. Anything else we go back to the top. goto Keep_going ; End_of_Page: printf (" end of font list\n") ; } // end of if (opt_SHOWFONTSONEACHPAGE) } // end of for loop } // end of else return ; } void print_used_fonts () { int i ; FONTLINK cur_FONTLINK ; for (i=0; i used) { printf (" Font [%d/%s/%d]\n", cur_FONTLINK -> font_id, cur_FONTLINK -> name, cur_FONTLINK -> scale) ; } cur_FONTLINK = cur_FONTLINK -> next ; } } } /* *************************************************************** */ /* Used for debugging. */ #ifndef NDEBUG void F_TELL(FILE * f) { printf(" >> File position is %ld\n", ftell(f) ) ; fflush(stdout) ; return ; } #endif /* *************************************************************** */ // FUNCTION read_next_opcode /* Parse the opcode that is currently being pointed at by the file pointer. (If the file pointer is not pointing at an opcode, unpredictable behavior may result.) When finished parsing the opcode, the file pointer ends up pointing at the next opcode; if this is the last opcode, the file pointer points at ??. Returns the opcode pointed at when procedure entered. Returns -1 if EOF encountered. show_specials: 1 print out specials on screen 0 do not print out specials on screen track_pages: 1 update global CurrentPhysicalPage 0 do not change CurrentPhysicalPage If BOP encountered, increments CurrentPhysicalPage and sets TeXPage equal to \count0. */ S2 read_next_opcode(FILE * dvifile, int show_specials, int track_pages) { S2 opcode ; S4 four_bytes ; int skip_bytes, ret_value ; char comment_string[256] ; post_info p ; /* Get the opcode. */ ret_value = read_one_byte(&opcode, dvifile) ; //printf("Read %o at file position %ld.\n", opcode, ftell(dvifile)); if (ret_value == -1) return -1 ; /* If opcode is BOP we want to increment the physical page counter and set the TeX page. */ if (opcode == (S2) OC_BOP) { if (track_pages) ++CurrentPhysicalPage ; ret_value = read_four_bytes(&four_bytes, dvifile) ; CurrentTeXPage = four_bytes ; fseek(dvifile, -4, SEEK_CUR) ; } /* Switch on the number of bytes the opcodes takes. */ skip_bytes = param_length3(opcode) ; switch (skip_bytes) { case -2: printf("ERROR. Invalid opcode (%d) found at file offset (%ld).\n", opcode, ftell(dvifile)) ; exit (0) ; break ; case -1: switch (opcode) { case OC_XXX1: case OC_XXX2: case OC_XXX3: case OC_XXX4: if (show_specials) { if (opt_SHOWPHYSICALPAGES) { printf("s:[%d/%d]:: ", CurrentPhysicalPage, (int) CurrentTeXPage) ; } else { printf("s:[XX/%d]:: ", (int) CurrentTeXPage) ; } if ((S4) read_special_opcode(dvifile, opcode, show_specials) == -1) { printf("ERROR reading a special opcode." ABORT_MSG "\n") ; exit (0) ; } else { printf("\n") ; } } else { (void) read_special_opcode(dvifile, opcode, show_specials) ; } break ; case OC_FD1: case OC_FD2: case OC_FD3: case OC_FD4: (void) read_fontdef_opcode(dvifile, opcode) ; break ; case OC_PRE: (void) read_pre_opcode(dvifile, comment_string) ; break ; case OC_POST: /* Back up one byte (read_post_opcode expects the file pointer to be pointing at OC_POST). */ backspace(dvifile) ; (void) read_post_opcode(dvifile, &p, FALSE) ; break ; default: printf("Inappropriate opcode found.\n") ; exit(0) ; break ; } break ; default: if (skip_bytes > 0) /* No reason to skip 0 bytes! (Thanks Heiko). */ { fseek(dvifile, (long) skip_bytes, SEEK_CUR) ; } else { /* We are skipping zero bytes. */ if (opt_CURFONT) { if ((opcode >= OC_FNTN0) && (opcode <= OC_FNTN127)) { CurrentFontNumber = opcode - OC_FNTN0 ; if (verbose) { printf("Switching font to font number %d\n", opcode) ; } } } if (SHOW_CONTROLCHARS) { if( (opcode<= CHAR_CCEND) || (opcode== CHAR_CCXX1) ) { printf("Control code %d found in font %d on page [%d/%d]\n", opcode, (int) CurrentFontNumber, (int) CurrentPhysicalPage, (int) CurrentTeXPage) ; } } } return opcode ; } return opcode ; } /* *************************************************************** */ // FUNCTION read_special_opcode /* dvifile should be pointing at the first byte immediately _following_ an opcode of type special (i.e., one of OC_XXX1, OC_XXX2, OC_XXX3, or OC_XXX4); if not, then unpredictable results will occur. If show_specials is 0 this function merely moves the file pointer to the ahead immediately following the special text. If show_specials is 1 this function parses the special and stores the contents in the character array special_text passed as the second parameter. NOTES: * The limit Special_Text_Length will not be exceded when allocating memory for the special text. * The procedure returns the length of the special text (or Special_Text_Length, whichever is the smaller), or -1 if an error is encountered. * The file pointer points at the byte immediately following the special text (i.e., the next op code). */ U4 read_special_opcode(FILE * dvifile, int opcode, int show_specials) { S2 one_byte ; S2 two_bytes ; S4 four_bytes ; char * special_text ; int text_length ; long text_rest; /* Make sure opcode is of the right type. */ if ((opcodeOC_XXX4)) return -1 ; four_bytes = (S4) 0 ; /* Read the next 1, 2, 3, or 4 bytes to get the length of the special text. */ switch (opcode) { case OC_XXX1: read_one_byte(&one_byte, dvifile) ; four_bytes = (S4) one_byte ; break ; case OC_XXX2: read_two_bytes(&two_bytes, dvifile) ; four_bytes = (S4) two_bytes ; break ; case OC_XXX3: read_three_bytes(&four_bytes, dvifile) ; break ; case OC_XXX4: read_four_bytes(&four_bytes, dvifile) ; break ; } /* We read the smaller of four_bytes and Special_Text_Length bytes. */ if (Special_Text_Length < 0) { text_length = four_bytes ; } else { text_length = S4_min((S4) Special_Text_Length, four_bytes) ; } // text_length = number of bytes to read from the special text. /* Read the text only if show_specials is set. */ if (show_specials) { /* Allocate memory for text string. Don't forget to free!! */ special_text = (char *) calloc((size_t) (text_length + 1), (size_t) sizeof(char)) ; if (special_text == (char *) NULL) { printf("Out of memory: special text is too long" ABORT_MSG "\n") ; exit (-1) ; } /* Read the data. */ fread(special_text, (size_t) sizeof(char), (size_t) text_length, dvifile) ; special_text[text_length] = '\0' ; /* If opt_REMOVECC is set, clean the text. */ if (opt_REMOVECC) { clean_chars(special_text, text_length) ; } /* If the special text was longer than text_length, skip the rest of the special text. (Fix supplied by Heiko Oberdiek.) */ text_rest = (long) (four_bytes - (S4) text_length); if (text_rest > 0) { fseek(dvifile, text_rest, SEEK_CUR); } printf("%s", special_text) ; free(special_text) ; } else { /* Just skip the text. */ fseek(dvifile, four_bytes, SEEK_CUR) ; } return four_bytes ; } /* *************************************************************** */ // FUNCTION clean_chars /* 'Clean' an array of characters, that is, change any not in the range 32 -- 126 to the character CC_MARKER (defined below). Returns the number of characters that had to be cleaned. */ #define CC_MARKER '.' int clean_chars(char * array_of_chars, int length) { int number_cleaned = 0 ; int i = 0 ; for (i = 0; i < length; ++i) { if ( ((unsigned int) array_of_chars[i] > 126) || ((unsigned int) array_of_chars[i] < 32) ) { array_of_chars[i] = CC_MARKER ; ++ number_cleaned ; } } return number_cleaned ; } /* *************************************************************** */ /* Return the number of bytes 'opcode' takes as parameter. If the number of parameters is variable (e.g., OC_XXX1), return -1. If opcode is not found in list, return -2 (i.e., error). */ int opcode_length[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //0-15 (0x-17x) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //16-31 (20x-37x) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //32-47 (40x-57x) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //48-63 (60x-77x) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //64-79 (100x-117x) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //80-95 (120x-137x) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //96-111 (140x-157x) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //112-127 (160x-177x) 1, 2, 3, 4, 8, 1, 2, 3, 4, 8, 0, 44, 0, 0, 0, 1, //128-143 (200x-217x) 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 1, 2, 3, //144-159 (220x-237x) 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 0, 0, 0, 0, //160-175 (240x-257x) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //176-191 (260x-277x) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //192-207 (300x-317x) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //208-223 (320x-337x) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, -1 , //224-239 (340x-357x) -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -2, -2, -2, -2, -2, -2} ; //240-255 (360x-377x) /* Simply return opcode_length[opcode]. (Why the '3'? It's the third version. */ inline int param_length3(S2 opcode) { if ((opcode < 0) || (opcode > 255)) return -2 ; return opcode_length[opcode] ; } /* The file pointer should be pointing at an opcode. Return the number of bytes between this current opcode and the next opcode (not counting either of the opcodes). Thus, the return value could be zero. FILE POINTER: stays at current position. */ int arg_length(FILE * dvifile) { int current_file_position ; int opcode, p_length ; int return_value ; U2 one_byte ; U2 two_bytes ; U4 four_bytes ; U2 a, l ; /* Store current position. */ current_file_position = ftell(dvifile) ; /* Read the current opcode */ (void) read_one_byte(&one_byte, dvifile) ; opcode = (unsigned int) one_byte ; /* Get the opcode length. */ p_length = param_length3(opcode) ; /* If p_length is not negative, then simply return p_length. */ if (p_length >= 0) { return_value = p_length ; } else { /* The alternative is that we have a variable length argument. In this case, we have to read some date to get the argument length. */ /* Do the specials. */ switch (opcode) { case OC_XXX1: (void) read_one_byte(&one_byte, dvifile) ; return_value = (one_byte + 1) ; break ; case OC_XXX2: (void) read_two_bytes(&two_bytes, dvifile) ; return_value = (two_bytes + 2) ; break ; case OC_XXX3: (void) read_three_bytes(&four_bytes, dvifile) ; return_value = (four_bytes + 3) ; break ; case OC_XXX4: (void) read_four_bytes(&four_bytes, dvifile) ; return_value = (four_bytes + 4) ; break ; case OC_FD1: // Skip ahead 13 bytes. fseek(dvifile, 13, SEEK_CUR) ; // Read a (void) read_one_byte(&a, dvifile) ; // Read l (void) read_one_byte(&l, dvifile) ; return_value = 15 + a + l ; break ; case OC_FD2: // Skip ahead 14 bytes. fseek(dvifile, 14, SEEK_CUR) ; // Read a (void) read_one_byte(&a, dvifile) ; // Read l (void) read_one_byte(&l, dvifile) ; return_value = 16 + a + l ; break ; case OC_FD3: // Skip ahead 15 bytes. fseek(dvifile, 15, SEEK_CUR) ; // Read a (void) read_one_byte(&a, dvifile) ; // Read l (void) read_one_byte(&l, dvifile) ; return_value = 17 + a + l ; break ; case OC_FD4: // Skip ahead 16 bytes. fseek(dvifile, 16, SEEK_CUR) ; // Read a (void) read_one_byte(&a, dvifile) ; // Read l (void) read_one_byte(&l, dvifile) ; return_value = (18 + a + l) ; break ; } } // End of else // Go back to where we started. fseek (dvifile, current_file_position, SEEK_SET) ; return return_value ; } /* *************************************************************** */ /* Parses a fontdef opcode. Assumes that the file pointer is pointing at the byte immediately _following_ the opcode. */ int read_fontdef_opcode(FILE * dvifile, int opcode) { S2 one_byte ; S2 two_bytes ; S4 four_bytes ; int a, l, n ; if ((opcodeOC_FD4)) return -1 ; /* Read the next 1, 2, 3, or 4 bytes to get the length of the font number. */ switch (opcode) { case OC_FD1: read_one_byte(&one_byte, dvifile) ; four_bytes = (S4) one_byte ; break ; case OC_FD2: read_two_bytes(&two_bytes, dvifile) ; four_bytes = (S4) two_bytes ; break ; case OC_FD3: read_three_bytes(&four_bytes, dvifile) ; break ; case OC_FD4: read_four_bytes(&four_bytes, dvifile) ; break ; } /* Skip ahead 12 bytes to get to 'a'. */ fseek(dvifile, (size_t) 12, SEEK_CUR) ; /* Read one byte. */ read_one_byte(&one_byte, dvifile) ; a = one_byte ; /* Read another byte. */ read_one_byte(&one_byte, dvifile) ; l = one_byte ; n = a + l ; /* Skip this font name. */ fseek (dvifile, n, SEEK_CUR) ; return n ; } /* *************************************************************** */ // FUNCTION read_pre_opcode /* Read the preamble and return the comment string. */ int read_pre_opcode(FILE * dvifile, char * comment_string) { S2 k ; /* Skip first 13 bytes to get to the comment string. */ fseek (dvifile, 14, SEEK_SET) ; /* Get k, the number of bytes of the comment string. */ (void) read_one_byte(&k, dvifile) ; /* Get the comment string. */ fread(comment_string, (size_t) 1, (size_t) k , dvifile) ; /* Set a null at the end of the comment string. */ comment_string[k] = '\0' ; return 0 ; } /* *************************************************************** */ void backspace(FILE * dvifile) { fseek (dvifile, -1, SEEK_CUR) ; return ; } /* *************************************************************** */ // FUNCTION read_post_opcode /* This function parse the postamble which extends from OC_POST to OC_POSTPOST (with a few more bytes after that). The dvifile file pointer should be pointing AT the post opcode. Here is what happens: 1. Verify that the byte currently pointed at is in fact OC_POST. 2. Find 2-byte page count. 3. Parse the font_defs counting up fonts. If show_fonts_flag is true, print out the font number/name/scale information. 4. The page and font count are stored in the structure pointed at by p. */ int read_post_opcode(FILE * dvifile, post_info * p, int show_fonts_flag) { S2 opcode ; S2 one_byte ; U2 page_count ; int ret_value, keep_going ; int number_of_fonts ; /* Read the opcode and verify that it is OC_POST. */ read_one_byte(&opcode, dvifile) ; if (opcode != OC_POST) { printf("opcode is not OC_POST.\n") ; exit (0) ; } /* Skip ahead 27 bytes to get page count. */ ret_value = fseek (dvifile, 26L, SEEK_CUR) ; if (ret_value == -1) { printf("fseek error\n") ; exit (-1) ; } /* Get 2-byte page count. */ (void) read_two_bytes(&one_byte, dvifile) ; page_count = (U2) one_byte ; p->page_count = page_count ; /* Parse the font defs. */ number_of_fonts = 0 ; keep_going = 1 ; while (keep_going) { ret_value = read_fnt_def_in_post(dvifile, show_fonts_flag) ; if (ret_value == -1) { printf ("font name read error.\n") ; keep_going = 0 ; } else { if (ret_value == OC_POSTPOST) { keep_going = 0 ; } else { ++number_of_fonts ; } } } p->number_of_fonts = number_of_fonts ; return 0 ; } /* *************************************************************** */ /* Parse the next font def in the postamble. If show_fonts_flag is TRUE, display font information on screen. RETURNS: ?? */ int read_fnt_def_in_post(FILE * dvi_file, int show_fonts_flag) { char font_string[256] ; S2 opcode ; S2 a, l; S2 two_bytes ; int scaled, font_number = -1 ; S4 four_bytes ; U4 scale_size, design_size, checksum ; /* Keep reading until a font is found, or an error is encountered. */ while (1) { /* Read the current byte and check that it is a font command. */ (void) read_one_byte(&opcode, dvi_file) ; switch (opcode) { case OC_NOP: break ; case OC_FD4: case OC_FD3: case OC_FD2: case OC_FD1: if (opcode==OC_FD4) { (void) read_four_bytes(&four_bytes, dvi_file) ; font_number = (int) four_bytes ; } if (opcode==OC_FD3) { (void) read_three_bytes(&four_bytes, dvi_file) ; font_number = (int) four_bytes ; } if (opcode==OC_FD2) { (void) read_two_bytes(&two_bytes, dvi_file) ; font_number = (int) two_bytes ; } if (opcode==OC_FD1) { (void) read_one_byte(&two_bytes, dvi_file) ; font_number = (int) two_bytes ; } /* Get 4-byte (checksum) to get to the font scale size. */ (void) read_four_bytes(&four_bytes, dvi_file) ; checksum = (U4) four_bytes ; (void) read_four_bytes(&four_bytes, dvi_file) ; scale_size = (U4) four_bytes ; /* Read design size. */ (void) read_four_bytes(&four_bytes, dvi_file) ; design_size = (U4) four_bytes ; (void) read_one_byte(&a, dvi_file) ; (void) read_one_byte(&l, dvi_file) ; /* Read the string of length a+l. */ fread(font_string, (size_t) 1, (size_t) a+l , dvi_file) ; /* Clip end. */ font_string[a+l] = '\0' ; if (show_fonts_flag) { scaled = (int) (0.5 + 1000.0 * ((double) scale_size)/((double) design_size)) ; printf("f:[%d/%s/%d]::%08lx", font_number, font_string, scaled, (unsigned long int) checksum) ; printf("\n") ; if (opt_SHOWFONTSONEACHPAGE) { add_font_hash (font_number, font_string, scaled) ; } } return opcode ; break ; case OC_POSTPOST: return opcode ; default: return -1 ; /* Anything else is an error. */ } } } /* *************************************************************** */ S4 S4_min(S4 a, S4 b) { if (a 0) { (void) read_one_byte(&one_byte, dvifile) ; opcode = (unsigned int) one_byte ; opcode_length = param_length3(opcode) ; skip_this_opcode = 0 ; if (flags & MD_FONTS) { skip_this_opcode = ( ((opcode >= OC_FNTN0) && (opcode <= OC_FNT4)) || ((opcode >= OC_FD1) && (opcode <= OC_FD4)) ) ; } if (flags & MD_NOPS) { skip_this_opcode = (skip_this_opcode || (opcode == OC_NOP)) ; } if (flags & MD_SPECIALS) { skip_this_opcode = (skip_this_opcode || ((opcode >= OC_XXX1) && (opcode <= OC_XXX4))) ; } if (flags & MD_STACK) { skip_this_opcode = (skip_this_opcode || ((opcode >= OC_PUSH) && (opcode <= OC_POP))) ; } if (skip_this_opcode) { if (opcode_length != 0) { backspace(dvifile) ; length = arg_length(dvifile) ; fseek(dvifile, length + 1, SEEK_CUR) ; bytes_left_on_page = bytes_left_on_page - length - 1 ; } else { --bytes_left_on_page ; } } else { switch(opcode_length) { // This case is handled differently than the others. case 0: bytes_left = 1 ; break ; case -1: // Get argument length. backspace (dvifile) ; length = arg_length(dvifile) ; ++length ; // length = number of bytes including opcode and argument. /* Read the argument and store. If length <= ARGBUFLEN, store in arg_buf; otherwise, use a malloc. */ if (length <= ARGBUFLEN) { (void) fread(arg_buf, (size_t) 1, (size_t) length, dvifile) ; buffer = arg_buf ; } else { buffer_start = calloc((size_t) (length + 1), (size_t) sizeof(char)) ; if (buffer_start == (char *) NULL) { printf("Out of memory: text is too long" ABORT_MSG "\n") ; exit (-1) ; } buffer = buffer_start ; calloc_flag = TRUE ; (void) fread(buffer, (size_t) sizeof(char), (size_t) 6, dvifile) ; } bytes_left = length ; break ; case -2: printf("Error reading opcode. Aborting.\n") ; exit (-1) ; break ; default: backspace (dvifile) ; bytes_left = opcode_length + 1 ; length = bytes_left ; (void) fread(arg_buf, 1, bytes_left, dvifile) ; buffer = arg_buf ; break ; } // End switch /* At this stage, bytes_length bytes of the opcode argument is stored in buffer (except if we are dealing with a one-byte opcode). Note also that the dvifile file pointer is pointing at the _next_ opcode. */ /* If opcode is a font number assignment or definition, skip it. */ while (bytes_left > 0) { /* If we are in the special case where opcode_length is 0 (meaning we are dealing with an opcode with no argument), we store the result and move on. */ if (opcode_length == 0) { sixteen_bytes[current_byte] = (int) opcode ; ++current_byte ; --bytes_left ; --bytes_remaining ; --bytes_left_on_page ; } else { /* We are dealing with an opcode which takes an argument. So, we need to store the opcode and its argument in sixteen bytes. The opcode and its arguments are in an array pointed to by buffer. Of course, the data in buffer may be longer than the number of spaces available in sixteen_char; so, we store the minimum of bytes_remaining and bytes_left. */ number_bytes_that_fit = int_min(bytes_remaining, bytes_left) ; for (i=0; i= 16) || (bytes_left_on_page == 0)) { /* for (i=0; i<=15; ++i) { printf("sixteen_bytes[%d] is %x\n", i, sixteen_bytes[i]) ; } */ b[0] = (sixteen_bytes[0] << 24) + (sixteen_bytes[1] << 16) + (sixteen_bytes[2] << 8) + (sixteen_bytes[3]) ; b[1] = (sixteen_bytes[4] << 24) + (sixteen_bytes[5] << 16) + (sixteen_bytes[6] << 8) + (sixteen_bytes[7]) ; b[2] = (sixteen_bytes[8] << 24) + (sixteen_bytes[9] << 16) + (sixteen_bytes[10] << 8) + (sixteen_bytes[11]) ; b[3] = (sixteen_bytes[12] << 24) + (sixteen_bytes[13] << 16) + (sixteen_bytes[14] << 8) + (sixteen_bytes[15]) ; total[0] = (U4) (total[0] + b[0]) ; total[1] = (U4) (total[1] + b[1]) ; total[2] = (U4) (total[2] + b[2]) ; total[3] = (U4) (total[3] + b[3]) ; // Zero sixteen_bytes; sixteen_bytes[0] = 0 ; sixteen_bytes[1] = 0 ; sixteen_bytes[2] = 0 ; sixteen_bytes[3] = 0 ; sixteen_bytes[4] = 0 ; sixteen_bytes[5] = 0 ; sixteen_bytes[6] = 0 ; sixteen_bytes[7] = 0 ; sixteen_bytes[8] = 0 ; sixteen_bytes[9] = 0 ; sixteen_bytes[10] = 0 ; sixteen_bytes[11] = 0 ; sixteen_bytes[12] = 0 ; sixteen_bytes[13] = 0 ; sixteen_bytes[14] = 0 ; sixteen_bytes[15] = 0 ; current_byte = 0 ; bytes_remaining = 16 ; } // end of if } // End of while (bytes_left > 0) if (calloc_flag) { free(buffer_start) ; calloc_flag = FALSE ; } } } hexify(total[0], eightchars) ; strncpy(checksum, eightchars, 8) ; hexify(total[1], eightchars) ; strncpy(checksum + 8, eightchars, 8) ; hexify(total[2], eightchars) ; strncpy(checksum + 16, eightchars, 8) ; hexify(total[3], eightchars) ; strncpy(checksum + 24, eightchars, 8) ; return ; } /* *************************************************************** */ // FUNCTION hexify /* Take a four-byte unsigned integer and return its 8-character hex string equivalent. */ static char hex_digit[16] = "0123456789ABCDEF" ; void hexify(U4 x, char * eightchars) { eightchars[0] = hex_digit[(U2) ((x & 0xF0000000) >> 28) ] ; eightchars[1] = hex_digit[(U2) ((x & 0x0F000000) >> 24) ] ; eightchars[2] = hex_digit[(U2) ((x & 0x00F00000) >> 20) ] ; eightchars[3] = hex_digit[(U2) ((x & 0x000F0000) >> 16) ] ; eightchars[4] = hex_digit[(U2) ((x & 0x0000F000) >> 12) ] ; eightchars[5] = hex_digit[(U2) ((x & 0x00000F00) >> 8) ] ; eightchars[6] = hex_digit[(U2) ((x & 0x000000F0) >> 4) ] ; eightchars[7] = hex_digit[(U2) ((x & 0x0000000F) >> 0) ] ; } /* *************************************************************** */ /* Timer code. */ #ifdef ENABLE_TIMING #define MAXSTRING 100 typedef struct { clock_t begin_clock, save_clock ; time_t begin_time, save_time ; } time_keeper ; static time_keeper tk ; void start_time(void) { tk.begin_clock = tk.save_clock = clock() ; tk.begin_time = tk.save_time = time(NULL) ; } double prn_time(void) { char s1[MAXSTRING], s2[MAXSTRING] ; int field_width, n1, n2 ; double clocks_per_second = (double) CLOCKS_PER_SEC, user_time, real_time ; user_time = (clock() - tk.save_clock) / (clocks_per_second) ; real_time = difftime(time(NULL), tk.save_time) ; tk.save_clock = clock() ; tk.save_time = time(NULL) ; n2 = sprintf(s1, "%.1f", user_time) ; n1 = sprintf(s2, "%.1f", real_time) ; field_width = (n1 > n2) ? n1 : n2 ; printf("%s%*.1f%s\n%s%*.1f%s\n\n", "User time: ", field_width, user_time, " seconds", "Real time: ", field_width, real_time, " seconds"); return user_time ; } #endif /* *************************************************************** */ // FUNCTION add_font_hash /* The add_font_hash function takes a font id and adds it to the font hash table. The font hash table is an array of length a power of 2. Because the font ids typically come in consecutive order starting around 80 or so, we use a hash table size the power of 2 closest to N. We handle collisions by linking entries with the same hash. We use the multiplication method. */ double A = 0.61803 ; // Golden ratio as suggested by Knuth. int add_font_hash (int key, char * font_string, int scale) { double kA ; int value ; FONTLINK cur_FONTLINK, new_FONTLINK ; // Compute hash function of key. kA = key * A ; value = floor ((double) FontHashTableSize * (kA - floor (kA))) ; //printf("The hash for %d is %d.\n", key, value) ; // Add to FontHashTable. If this entry is entry we are set. cur_FONTLINK = FontHashTable + value ; if (cur_FONTLINK -> font_id == -1) { // Store the value. cur_FONTLINK -> font_id = key ; // Copy the font string cur_FONTLINK -> name = malloc (256 * sizeof(char)) ; (void) strcpy (cur_FONTLINK -> name, font_string) ; // Copy the font scale cur_FONTLINK -> scale = scale ; } else { if (verbose) { printf("\n\nCOLLISION!!!!!!!!!!!!!\n\n") ; } ++NumberOfCollisions ; // This is a collision. So go follow links to find end. while (cur_FONTLINK -> next != (FONTLINK) NULL) { // Go to next link. cur_FONTLINK = cur_FONTLINK -> next ; } // OK. We have found the end. malloc another entry. new_FONTLINK = malloc(sizeof(FONTELEMENT)) ; cur_FONTLINK -> next = new_FONTLINK ; new_FONTLINK -> font_id = key ; // Copy the font string new_FONTLINK -> name = malloc (256 * sizeof(char)) ; (void) strcpy (new_FONTLINK -> name, font_string) ; // Copy the font scale new_FONTLINK -> scale = scale ; new_FONTLINK -> next = (FONTLINK) NULL ; } //print_font_hash_table () ; return 1 ; } void print_font_hash_table () { int i ; FONTLINK cur_FONTLINK ; for (i=0; i [%d/%s/%d] [used=%d]\n", cur_FONTLINK -> font_id, cur_FONTLINK -> name, cur_FONTLINK -> scale, cur_FONTLINK -> used ) ; cur_FONTLINK = cur_FONTLINK -> next ; } } } } /* *************************************************************** */ // FUNCTION font_hash (int font_id) /* The function font_hash takes as its argument the font_id and returns a pointer to the font element in the FontHashTable. */ FONTLINK font_hash (int key) { double kA ; int value ; FONTLINK cur_FONTLINK ; // Compute hash function of key. kA = key * A ; value = (int) floor ((double) FontHashTableSize * (kA - floor (kA))) ; cur_FONTLINK = FontHashTable + value ; // Find this key. while ( (cur_FONTLINK != (FONTLINK) NULL) && (cur_FONTLINK -> font_id != key) ) { cur_FONTLINK = cur_FONTLINK -> next ; } if (cur_FONTLINK == (FONTLINK) NULL) { printf ("[font_hash]: could not find key %d in FontHashTable\n", key); print_font_hash_table () ; printf (ABORT_MSG) ; exit (-1) ; } if (cur_FONTLINK -> font_id != key) { printf ("[font_hash]: could not find key %d in FontHashTable\n", key); exit (-1) ; } return cur_FONTLINK ; } /* *************************************************************** */ /* Find the largest power of 2 that is not less than n. Examples: closest_power_of_2(4) = 4 closest_power_of_2(9) = 16 We want this function to be fast and to not use the math library. */ unsigned int closest_power_of_2(unsigned int n) { unsigned int i, m ; i = 0 ; // One's complement of 0. m = n ; while (m) { // Shift bits right one place. // printf("%d ", m) ; m = m >> 1 ; ++i ; } if ((1<<(i-1))==n) { return (1 << --i) ; // In the case that n is a perfect power of 2. } else { return (unsigned) (1 << i) ; } } /* *************************************************************** */ /* Return the floor of x, that is, the largest integer not greater than x. */ double floor(double x) { double m ; m = (int) x ; if (m > x) { return (m - (double) 1.0) ; } else { return (m) ; } } /* *************************************************************** */ /* HISTORY: 02 May 2002: version 0.44. Added another ignore option to the checksum code: MD_STACK to ignore push'es and pop's. 23 April 2002: version 0.43. Added -P to suppress the display of physical pages. 30 December 2001: version 0.42. Added -d option to dump opcodes. 25 March 2001: version 0.41. Replaced the log function and floor with internal functions (so math.h is no longer needed). 21 March 2001: version 0.40a. Free'd the FontHashTable memory when printing page fonts. 25 February 2001: version 0.40. First pass at listing fonts on a given page. Note that we added math.h as an #include. 25 January 2001: version 0.32. Padded checksum output (otherwise could get fewer than 8 hex characters). 26 April 2000: version 0.31. Fixed BAD bug in opcode parameter length array. WOW! 01 February 2000: version 0.30. Changed message digest code so that -M takes a numeric parameter to indicate which opcodes to ignore. -M0 is equivalent to -m, -M1 ignores fonts, -M2 ignores NOPS, -M4 ignores specials (can add numbers to combine ignores). 29 January 2000: version 0.27b. Fixed small bug that gave incorrect error message when file not found. 18 December 1999: version 0.27. Updated number for distribution. 09 November 1999: version 0.25beta. Revised message_digest to add option of ignoring all font assignments and definitions. 09 November 1999: beta version Added -S to count number of specials for summary. Fixed bug which would display specials twice in some circumstances. First version of message digest option: simple sum version of message digest. Added font checksum. 20 October 1999: version 0.22b/c. If special text length option is negative, read entire special text. Change how fonts are displayed using SHOW_FONTS flag. Make a tiny bit faster by not allocating memory for pageinfo unless opt_PAGES is actually TRUE. 15 October 1999: version 0.22a. First attempt at making variable-length special text display. Changed CC_MARKER to '.' at Heiko's request. 14 October 1999: version 0.22. Heiko Oberdiek suggested an option (currently in testing) to suppress control characters in special text from appearing on the screen (these control characters can screw up some screens). Heiko (again!) pointed out that if the special text is too long, then the excess has to be skipped, or else the parsing will parse the special text as if it was opcodes; Heiko also supplied a simple fix for this problem. Wrote param_length3 to replace param_length; should be faster. Added some conditional compilation code in case someone has a strange stdio.h. Initialized some variables. Added ENABLE_TIMING for conditionally compiling in timing code; this is in case someone does not have the correct code. 22 September 1999: version 0.21. Fixed bug that gave incorrect response when no filename was specified. Changed e-mail contact. 26 June 1999: version 0.20. -G option (beta) to find "bad" characters (0-31 & 127). Rewrote some of the option-handling code. Fixed bug in font counting. Added font number when displaying font information. 21 May 1999: worked on making it run on 64-bit machines ; also shortened read_one_byte to increase speed. 20 May 1999: added a missing "\n" in print_use_string 17 May 1999: version 0.1: added -C for more rigorous validity check. Made a few minor code changes. 07 May 1999: made a few more small code optimizations. 05 May 1999: made a few other small improvements and changes to help screen. 03 May 1999: made some code improvements and bug fixes as suggested by Heiko Oberdiek 20 April 1999: initial version 0.0 */ /* ACKNOWLEDGEMENTS: Heiko Oberdiek made significant suggestions on improving the performance of the code, and pointed out several errors. Tom Kacvinsky helped make the code work on 64-bit machines. Karsten Tinnefeld made some helpful suggestions on getting the code to compile on older suns and contributed some linux and Sun binaries. */ /* PROFILING: The function read_next_code takes the lion's share of the time. It repeatedly calls the two functions param_length and read_one_byte. Concentrate on these functions to increase performance. */ /* TO DO: Make work with nonstandard dvi files (etex, omega, etc)? Remove calloc from message_digest code; not really needed. ERROR code?? 0 = exit normally 1 = file not found 2 = file found but not a dvi file? */ /* IGNORE IGNORE IGNORE IGNORE IGNORE IGNORE IGNORE IGNORE */