/* build_index.c * Copyright (C) 2011, Antonio FariƱa, all rights reserved. * * build_index.c: Main program to build a word-based compressed data structure * defined as followed in "interface.h". * * This program is linked with the implementation of such data structure (wcsa.a) * and the building-program is created. * It loads the source text, creates the self-index and saves it to disk. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include "interface.h" /* only for getTime() */ #include #include /* macro to detect and notify errors */ #define IFERROR(error) {{if (error) { fprintf(stderr, "%s\n", error_index(error)); exit(1); }}} int read_file(char *filename, uchar **textt, ulong *length); void print_usage(char *); double getTime(void); int main(int argc, char *argv[]) { char *infile, *outfile; uchar *text; char *params = NULL; ulong text_len; void *index; int error, i; double start, end; if (argc < 3) print_usage(argv[0]); if (argc > 3) { int nchars, len; nchars = argc-3; for(i=2;i Output %lu bytes.\n", text_len, index_len); fprintf(stdout,"Overall compression --> %.2f%% (%.2f bits per char).\n\n", (100.0*index_len)/text_len, (index_len*8.0)/text_len); error = free_index(index); IFERROR(error); free(params); exit(0); } /* Opens and reads a text file */ int read_file(char *filename, uchar **textt, ulong *length) { uchar *text; unsigned long t; FILE *infile; infile = fopen(filename, "rb"); // b is for binary: required by DOS if(infile == NULL) return 1; /* store input file length */ if(fseek(infile,0,SEEK_END) !=0 ) return 1; *length = ftell(infile); /* alloc memory for text (the overshoot is for suffix sorting) */ text = (uchar *) malloc((*length)*sizeof(*text)); if(text == NULL) return 1; /* read text in one sweep */ rewind(infile); t = fread(text, sizeof(*text), (size_t) *length, infile); if(t!=*length) return 1; *textt = text; fclose(infile); return 0; } double getTime (void) { double usertime, systime; struct rusage usage; getrusage (RUSAGE_SELF, &usage); usertime = (double) usage.ru_utime.tv_sec + (double) usage.ru_utime.tv_usec / 1000000.0; systime = (double) usage.ru_stime.tv_sec + (double) usage.ru_stime.tv_usec / 1000000.0; return (usertime + systime); } void print_usage(char * progname) { fprintf(stderr, "Usage: %s []\n", progname); fprintf(stderr, "\nIt builds the index for the text in file ,\n"); fprintf(stderr, "storing it in . Any additional \n"); fprintf(stderr, "will be passed to the construction function.\n"); fprintf(stderr, "At the end, the program sends to the standard error \n"); fprintf(stderr, "performance measures on time to build the index.\n\n"); exit(1); }