/* 
 * Compress - data compression program 
 */

#include "compress.h"

/* For SPEC95 use, PBITS and BITS automatically set to 16.
	Jeff Reilyy, 1/15/95				*/
#define HSIZE	69001		/* 95% occupancy */

typedef long int       count_int;

/* Hooray!  Solaris has include files with prototypes! */
/* #include "useful.h" */

int spec_select_action(char* from_buf,
		       int from_count,
		       int action,
		       char* to_buf);

static void cl_hash(count_int hsize,	/* reset code table */
                    count_int *htab);

static count_int htab[HSIZE];
static unsigned short codetab[HSIZE];

static const code_int hsize = HSIZE;	/* for dynamic table sizing */

/*****************************************************************
 * TAG( main )
 *
 * Algorithm from "A Technique for High Performance Data Compression",
 * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19.
 *
 * Usage: compress [-dfvc] [-b bits] [file ...]
 * Inputs:
 *	-d:	    If given, decompression is done instead.
 *
 *      -c:         Write output on stdout, don't remove original.
 *
 *      -b:         Parameter limits the max number of bits/code.
 *
 *	-f:	    Forces output file to be generated, even if one already
 *		    exists, and even if no space is saved by compressing.
 *		    If -f is not used, the user will be prompted if stdin is
 *		    a tty, otherwise, the output file will not be overwritten.
 *
 *      -v:	    Write compression statistics
 *
 * 	file ...:   Files to be compressed.  If none specified, stdin
 *		    is used.
 * Outputs:
 *	file.Z:	    Compressed form of file with same mode, owner, and utimes
 * 	or stdout   (if stdin used as input)
 *
 * Assumptions:
 *	When filenames are given, replaces with the compressed version
 *	(.Z suffix) only if the file decreases in size.
 * Algorithm:
 * 	Modified Lempel-Ziv method (LZW).  Basically finds common
 * substrings and replaces them with a variable size code.  This is
 * deterministic, and can be done on the fly.  Thus, the decompression
 * procedure needs no input table, but tracks the way the table was built.
 *
 *
 * Changed from main to spec_select_action,
 *	Jeff Reilly - 	1/15/95 SPEC
 */

/* compress (Originally: stdin to stdout -- Changed by SPEC to: memory
 * to memory)
 *
 * Algorithm: use open addressing double hashing (no chaining) on the
 * prefix code / next character combination.  We do a variant of
 * Knuth's algorithm D (vol. 3, sec. 6.4) along with G. Knott's
 * relatively-prime secondary probe.  Here, the modular division first
 * probe is gives way to a faster exclusive-or manipulation.  Also do
 * block compression with an adaptive reset, whereby the code table is
 * cleared when the compression ratio decreases, but after the table
 * fills.  The variable-length output codes are re-sized at this
 * point, and a special CLEAR code is generated for the decompressor.
 * Late addition: construct the table according to file size for
 * noticeable speed improvement on small files.  Please direct
 * questions about this implementation to ames!jaw.  */

/* table clear for block compress */
static int
calc_ratio(long int in_count,
           long int bytes_out)
{
  long int ratio;

  if (in_count > 0x007fffff)
  {	/* shift will overflow */
    long int high_order = bytes_out >> 8;
    /* Don't divide by zero */
    ratio = ((high_order == 0) ? 0x7fffffff : (in_count / high_order));
  }
  else
  {
    ratio = (in_count << 8) / bytes_out; /* 8 fractional bits */
  }

  return ratio;
}
/*
static inline long fcode_of(const int c,
			 const int maxbits,
			 const code_int ent) {
  return (long) (((long) c << maxbits) + ent);
}
*/
#define fcode_of(c,maxbits,ent) ((long)(((long)(c) << (maxbits)) + (ent)))

static /* inline */ int
hash_lookup(const int c,
	    const code_int ent,
	    const long fcode,
	    const int hshift,
	    const code_int hsize)
{
  code_int i = ((c << hshift) ^ ent);	/* xor hashing */
  int disp = ((i == 0) ? 1 : (hsize - i));

  if ( htab[i] == fcode ) {
    return i;
  }

  /* secondary hash (after G. Knott) */
  while ( (long)htab[i] >= 0 ) {	/* slot not empty ! */
    if ( (i -= disp) < 0 ) i += hsize;

    if ( htab[i] == fcode ) {
      return i;
    }
  }

  return i;
}

static
int
calc_hshift(long hsize)
{
  int  hshift = 8;
  long i;

  for (i = hsize; i < 65536L; i *= 2L)
  {
    hshift--;
  }

  return hshift;
}

void
compress(const int maxbits,
         const int block_compress,
         stream_state *in_stream,
         stream_state *out_stream)
{
  int c;
  code_int ent;
  int hshift = calc_hshift(hsize);
  code_int free_ent = FIRST(block_compress);
  long int in_count = 1;	/* length of input */
  long int ratio = 0;		/* last compression ratio */
  const count_int CHECK_GAP = 10000; /* ratio check interval */
  count_int checkpoint = CHECK_GAP; /* next time to check ratio */
  output_buf_t output_buf;

  output_initialize(&output_buf, maxbits, block_compress, out_stream);

  ent = getbyte (in_stream);

  cl_hash( (count_int) hsize, htab);		/* clear hash table */

  while ( (c = getbyte(in_stream)) != EOF ) {
    long fcode = fcode_of(c, maxbits, ent);
    code_int i = hash_lookup(c, ent, fcode, hshift, hsize);

    in_count++;

    if (htab[i] == fcode) {
      ent = codetab[i];
    }
    else {
      /* not found */
      output( ent, &output_buf );
      ent = c;
      if ( free_ent < MAX_MAX_CODE(maxbits) ) {
	/* insert a new entry */
	codetab[i] = free_ent; /* code -> hashtable */
	htab[i] = fcode;
	/* just assigned a new entry number, if it
	 * is greater than the maxcode then we're going
	 * to have to bump up the number of bits we're using.
	 */
	output_check_bits(free_ent, &output_buf);

	free_ent++;
      }
      else {
	if ( ((count_int)in_count >= checkpoint) && block_compress ) {
	  long int new_ratio = calc_ratio(in_count,
                                          var_sym_output_length(&output_buf));

	  checkpoint = in_count + CHECK_GAP;
	  if (new_ratio > ratio) {
	    ratio = new_ratio;
	  }
	  else {
            /* The compression ratio is falling off so clear the table
             * and get a fresh start */
	    ratio = 0;
	    cl_hash ( (count_int) hsize, htab );
	    free_ent = FIRST(block_compress);
	    output_clear(&output_buf); /* send the clear code */
	  }
	}
      }
    }

  }
  /*
   * Put out the final code.
   */
  output( ent, &output_buf );
  output_eof( &output_buf );

  return;
}

static void
cl_hash(count_int hsize, /* reset code table */
        count_int *htab)
{
  count_int *htab_p = htab+hsize;
  long i;
  long m1 = -1;

  i = hsize - 16;

  do {				/* might use Sys V memset(3) here */
    *(htab_p-16) = m1;
    *(htab_p-15) = m1;
    *(htab_p-14) = m1;
    *(htab_p-13) = m1;
    *(htab_p-12) = m1;
    *(htab_p-11) = m1;
    *(htab_p-10) = m1;
    *(htab_p-9) = m1;
    *(htab_p-8) = m1;
    *(htab_p-7) = m1;
    *(htab_p-6) = m1;
    *(htab_p-5) = m1;
    *(htab_p-4) = m1;
    *(htab_p-3) = m1;
    *(htab_p-2) = m1;
    *(htab_p-1) = m1;
    htab_p -= 16;
  } while ((i -= 16) >= 0);

  for ( i += 16; i > 0; i-- )
    *--htab_p = m1;
}
