see also: Huffman and Fibonacci

/*
        Illustrative example of minimum-redundancy code calculation.

        Program reads a file of symbol frequencies
            -- one per line
            -- ascending frequency
            -- first line indicates upper bound on how many 
                    frequencies appear;
        calculate the codeword lengths for a minimum-redundancy code;
        print out the codeword lengths;
        print out the average cost per symbol and the entropy.

        The method for calculating codelengths in function
        calculate_minimum_redundancy is described in 

        @inproceedings{mk95:wads,
                author = "A. Moffat and J. Katajainen",
                title = "In-place calculation of minimum-redundancy codes",
                booktitle = "Proc. Workshop on Algorithms and Data Structures",
                address = "Queen's University, Kingston, Ontario",
                publisher = "LNCS 955, Springer-Verlag",
                Month = aug,
                year = 1995,
                editor = "S.G. Akl and F. Dehne and J.-R. Sack",
                pages = "393-402",
        }

        The abstract of that paper may be fetched from 
        http://www.cs.mu.oz.au/~alistair/abstracts/wads95.html
        A revised version is currently being prepared.

        Written by
      Alistair Moffat, alistair@cs.mu.oz.au,
      Jyrki Katajainen, jyrki@diku.dk
   November 1996.
*/

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#define log2(x) (log10((double)(x))/log10(2.0))


void
calculate_minimum_redundancy(int *, int);

#define LIMIT 1000000                        /* maximum value for n */

int
main(int argc, char **argv) {

        int *A;                              /* the array of freqs */
        int *B;                              /* a copy of A */
        int n;                               /* number of symbols */
        int totfreq;                         /* total frequency */
        int bits;                            /* total bits in codewords */
        double ent;                          /* entropy */

        int i;

        /* claim space for arrays */
        scanf("%d", &n);
        if (n <= 0 || n > LIMIT) {
                fprintf(stderr, "%s: n should be at least 0 and less than %d\n",
                        argv[0], LIMIT);
                exit(1);
        }
        if ((A = (int *)malloc(n*sizeof(int))) == (int *)NULL
            || (B = (int *)malloc(n*sizeof(int))) == (int *)NULL) {
                    fprintf(stderr, "%s: unable to allocate memory\n",
                        argv[0]);
                exit(1);
        }

        /* read symbol frequencies */
        for (i=0; i<n; i++) {
                if (scanf("%d", A+i) == EOF) {
                        n = i;
                        break;
                }
        }

        /* calculate entropy, and check for sensible input values */
        totfreq = 0;
        for (i=0; i<n; i++) {
                totfreq += A[i];
                if (A[i] < 0 || (i>0 && A[i-1] > A[i])) {
                        fprintf(stderr, "%s: %s %s\n",
                                argv[0],
                                "input frequencies must be non-negative",
                                "and non-decreasing");
                        exit(1);
                }
        }
        if (totfreq <= 0) {
                fprintf(stderr, "%s: sum of frequencies must be positive\n",
                        argv[0]);
                exit(1);
        }
        ent = 0.0;
        for (i=0; i<n; i++) {
                double prob = (double)A[i]/totfreq;
                ent += - prob * log2(prob);
        }


        /* make a copy of A into B so that average weighted codeword
           length can be calculated once the codeword lengths are known;
           note that B is _not_ used by the length calculation process */
        for (i=0; i<n; i++) {
                B[i] = A[i];
        }

        /* now calculate the minimum-redundancy codeword lengths */
        calculate_minimum_redundancy(A, n);

        /* and evaluate average codeword length */
        bits = 0;
        for (i=0; i<n; i++) {
                bits += A[i] * B[i];
        }

        /*  and print the results */
   if (n <= 100) {
      for (i=0; i<n; i++) {
         printf("f_%02d = %4d, |c_%02d| = %2d\n",
            i, B[i], i, A[i]);
      }
   }
        printf("entropy                 = %5.2f bits per symbol\n", ent);
        printf("minimum-redundancy code = %5.2f bits per symbol\n", 
                (double)bits/totfreq);
   if (ent > 0.0)
      printf("inefficiency            = %5.2f%%\n", 
         100*(double)bits/totfreq/ent - 100.0);
        
   free(A);
   free(B);
   return(0);
}



/*** Function to calculate in-place a minimum-redundancy code
     Parameters:
        A        array of n symbol frequencies, non-decreasing
        n        number of symbols in A
*/
void
calculate_minimum_redundancy(int A[], int n) {
        int root;                  /* next root node to be used */
        int leaf;                  /* next leaf to be used */
        int next;                  /* next value to be assigned */
        int avbl;                  /* number of available nodes */
        int used;                  /* number of internal nodes */
        int dpth;                  /* current depth of leaves */

        /* check for pathological cases */
        if (n==0) { return; }
        if (n==1) { A[0] = 0; return; }

        /* first pass, left to right, setting parent pointers */
        A[0] += A[1]; root = 0; leaf = 2;
        for (next=1; next < n-1; next++) {
                /* select first item for a pairing */
                if (leaf>=n || A[root]<A[leaf]) {
                        A[next] = A[root]; A[root++] = next;
                } else
                        A[next] = A[leaf++];

                /* add on the second item */
                if (leaf>=n || (root<next && A[root]<A[leaf])) {
                        A[next] += A[root]; A[root++] = next;
                } else
                        A[next] += A[leaf++];
        }
        
        /* second pass, right to left, setting internal depths */
        A[n-2] = 0;
        for (next=n-3; next>=0; next--)
                A[next] = A[A[next]]+1;

        /* third pass, right to left, setting leaf depths */
        avbl = 1; used = dpth = 0; root = n-2; next = n-1;
        while (avbl>0) {
                while (root>=0 && A[root]==dpth) {
                        used++; root--;
                }
                while (avbl>used) {
                        A[next--] = dpth; avbl--;
                }
                avbl = 2*used; dpth++; used = 0;
        }
}

-- MattWalsh - 17 Mar 2004

Topic revision: r1 - 17 Mar 2004 - MattWalsh
 
This site is powered by the TWiki collaboration platformCopyright © 2008-2012 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding TWiki? Send feedback