#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <math.h>
#include <unistd.h>

#include <sys/ioctl.h>


/*
 * The interesting stuff in this file is voladj_*
 * The main function gives an indication of how to use
 * the functions.  Basically, call voladj_init with the arguments
 * you want, and then pass the address of the returned structure
 * into all subsequent calls.
 *
 * To look ahead, call voladj_check() on the future buffer, and then
 * voladj_scale on the current buffer.
 *
 * To not look ahead, call voladj_check() on the current buffer, and
 * then voladj_scale on the current buffer.
 *
 * If we decide that the look ahead doesn't improve the sound at all,
 * then we can pretty much remove voladj_check, and it becomes
 * even more trivial.
 */ 

#define AUDIO_FILLSZ   4608
#define BLOCKSIZE   AUDIO_FILLSZ

#define MAXSAMPLES 32767

#define MULT_POINT 12
#define SHORT_FRAC_POINT MULT_POINT
#define SHORT_FRAC_VAL ((double)(1 << SHORT_FRAC_POINT))
#define MULT_INTBITS (30 - (MULT_POINT + SHORT_FRAC_POINT))

#define MULT_TO_16 (MULT_POINT + MULT_INTBITS - 16)
#define RESULT_SHIFT MULT_POINT

struct voladj_state {
  int output_multiplier;
  int desired_multiplier;
  int buf_size;
  short headroom;
  short minvol;
  short increase;
  short decrease;
  short max_sample;
};


/*
 * Initialise all our stuff.  One of the main things this does is
 * convert easy to read floats into fixed point.
 */

struct voladj_state voladj_init( 

  int buf_size,         /* (Fixed) size of blocks in bytes to process */
  double db_per_second, /* Maximum rate of volume change, in dB/sec */
  double minvol,        /* Minimum volume to attempt to maintain (0 - 1) */
  double headroom       /* Headroom multiplier */
  ) {
  struct voladj_state initial;

  double factor_per_second, increase_factor, decrease_factor;

  initial.output_multiplier = 0x1 << MULT_POINT;
  initial.desired_multiplier = initial.output_multiplier;
  initial.buf_size = buf_size;
  initial.headroom = 3 << (SHORT_FRAC_POINT - 2);

  factor_per_second = pow( 10.0 , db_per_second / 10.0 );
  increase_factor = pow( factor_per_second, (buf_size / (4.0 * 44100.0)));
  decrease_factor = 1.0 / increase_factor;

  initial.increase = (short)(increase_factor * SHORT_FRAC_VAL);
  initial.decrease = (short)(decrease_factor * SHORT_FRAC_VAL);

  if (minvol > 1.0) {
    initial.minvol = 1 << SHORT_FRAC_POINT;
  } else if (minvol < 0.0) {
    initial.minvol = 0;
  } else {
    initial.minvol = (short)(minvol * SHORT_FRAC_VAL);
  }

  if (headroom > 1.0) {
    initial.headroom = 1 << SHORT_FRAC_POINT;
  } else if (headroom < 0.0) {
    initial.headroom = 0;
  } else {
    initial.headroom = (short)(headroom * SHORT_FRAC_VAL);
  }

  initial.max_sample = 0;

  fprintf(stderr, 
      "minvol: %x headroom: %x increase: %x decrease %x\n", 
      initial.minvol, initial.headroom, initial.increase, initial.decrease);
  fprintf(stderr, 
      "MULT_TO_16: %d MULT_POINT: %d MULT_INTBITS: %d SHORT_FRAC_VAL: %f \n", 
      MULT_TO_16, MULT_POINT, MULT_INTBITS, SHORT_FRAC_VAL);

  return initial;

};

/*
 * Here we figure out what multiplier we want, based on the minimum
 * volume that we are aiming for, and the maximum sample that we
 * can see.
 */

int voladj_get_multiplier( struct voladj_state *state, short max_sample ) {
  int desired_out, desired_mult;
  if (max_sample == 0) max_sample = 1;
  desired_out = ((int)(state->minvol) * (int)MAXSAMPLES)  + 
      (((int)max_sample) * ( (1 << SHORT_FRAC_POINT) - (int)(state->minvol) )) ;
  desired_mult = desired_out  / max_sample ;

  if (0) 
  fprintf(stderr,
      "Desired mult %x, from sample %d, desired output %d\n", 
      desired_mult, max_sample, desired_out);

  desired_mult = (desired_mult  * (int)state->headroom) >> RESULT_SHIFT;
  if ((desired_mult > (1 << (MULT_POINT + MULT_INTBITS)))) {
    fprintf(stderr, 
      "Desired mult %d, %x too big, from sample %d, desired output %d\n", 
      desired_mult, desired_mult, max_sample, desired_out);
    desired_mult = state->output_multiplier;
  }
  if (desired_mult < (1 << MULT_POINT)) {
      desired_mult = (1 << MULT_POINT);
    }
  return desired_mult;
}


/* 
 * This bit of code searches for any samples that will cause clipping
 * in the future.  The reason we read ahead is so that we never
 * have to do abrupt volume changes.  I'm not so sure that this is
 * necessary, because even gradual volume changes over 4k of data
 * are pretty abrupt to the ear.  Still, I have this nagging feeling
 * that suddenly changing the scaling factor from 100 to 1 would
 * produce some high frequency components.
*/

void voladj_check( 
  struct voladj_state *state, 
  short *lookaheadbuf ) {

  int outmult;
  int desired_multiplier;
  int upmult;
  int downmult;
  short max_sample;
  short cur_sample;
  short max_la_sample;

  int quickadjust = 0;
  int num_samples = state->buf_size / 2;
  int i, outputvalue;

  max_la_sample = 0;
  for( i = 0; i < num_samples; i++ ) {
    cur_sample = abs( lookaheadbuf[ i ] );
    if (cur_sample > max_la_sample) {
      max_la_sample = cur_sample;
    }
  }

  outmult = state->output_multiplier;
  outputvalue = ( ( (outmult >> MULT_TO_16 ) * 
    max_la_sample ) >>  (MULT_POINT - MULT_TO_16 ) );

  if (outputvalue > MAXSAMPLES ) {
    quickadjust = 1;
  }

  max_sample = max_la_sample;
  if (state->max_sample > max_sample) {
    max_sample = state->max_sample;
  }
  
  desired_multiplier = voladj_get_multiplier( state, max_sample );

  upmult = (outmult * state->increase) >> RESULT_SHIFT;
  downmult = (outmult * state->decrease) >> RESULT_SHIFT;

  if (quickadjust > 0 ) {
    fprintf(stderr, 
      "quick: %d outmult: %x desmult: %x upmult: %x downmult: %x\n", 
      quickadjust, outmult, desired_multiplier, upmult, downmult);
  }

  if (! quickadjust) {
    if (desired_multiplier > upmult) {
      desired_multiplier = upmult;
    }
    if (desired_multiplier < downmult) {
      desired_multiplier = downmult;
    }
  }

  state->desired_multiplier = desired_multiplier;
  state->max_sample = max_la_sample;
}

void voladj_scale( 
  struct voladj_state *state, 
  short *scalebuf 
  ) {

  int outmult = state->output_multiplier;
  int desired_multiplier = state->desired_multiplier;
  int output_value,i;
  short output_sample;
  int num_samples = state->buf_size / 2;

  /*
  ** In the previous call to voladj_check we made sure that the
  ** output multiplier was set to a value that will not cause
  ** clipping for any of the samples, so we don't have to worry
  ** about that here.
  */

  for( i = 0; i < num_samples; i++ ) {
    if (desired_multiplier != outmult) {
      outmult = outmult + 
        ((desired_multiplier - outmult) / (num_samples - i));
    } 
    output_value = ((outmult >> MULT_TO_16) 
      * scalebuf[ i ]) >> (MULT_POINT - MULT_TO_16 ) ;
    if (output_value > MAXSAMPLES) {
      fprintf(stderr, "CLIPPING! in: %d, out: %d, mult: %x\n", 
          scalebuf[ i ], output_value, outmult);
    }
    output_sample = (short)(0x0000ffff & output_value);
    scalebuf[ i ] = output_sample;
  }

  state->output_multiplier = outmult;
}



int main(int argc, char *argv[])
{
  static char buffer1[BLOCKSIZE];
  static char buffer2[BLOCKSIZE];
  char *actualbuff, *lookaheadbuff, *tempbuf;
  int i,lookahead;
  unsigned int len,lastlen;
  struct voladj_state state;

  state = voladj_init( 
    BLOCKSIZE,      /* (Fixed) size of blocks in bytes to process */
    3.0,            /* Maximum rate of volume change, in dB/sec */
    0.1,            /* Minimum volume to attempt to maintain (0 - 1) */
    0.9             /* Headroom multiplier */
    );

  for (i = 0; i < BLOCKSIZE; i++) {
    buffer1[ i ] = 0;
  }
  for (i = 0; i < BLOCKSIZE; i++) {
    buffer2[ i ] = 0;
  }

  lookahead = 1;

  actualbuff = buffer1;
  lookaheadbuff = buffer2;
  lastlen = 0;

  while ((len = fread(lookaheadbuff, 4, BLOCKSIZE / 4, stdin))) {

    if (lookahead) {
      voladj_check( &state, (short *)lookaheadbuff );
    } else {
      actualbuff = lookaheadbuff;
      voladj_check( &state, (short *)actualbuff );
      state.output_multiplier = state.desired_multiplier;
      lastlen = len;
    } 
    voladj_scale( &state, (short *)actualbuff );

    fwrite( actualbuff, 4, lastlen, stdout );
    tempbuf = actualbuff;
    actualbuff = lookaheadbuff;
    lookaheadbuff = tempbuf;
    lastlen = len;
  }

  if ( lookahead) {
    voladj_scale( &state, (short *)actualbuff );
    fwrite( actualbuff, 4, lastlen, stdout );
  }

  return 0;
}

