#ifndef IFETCH_H_GUARD
#define IFETCH_H_GUARD

#include <stdio.h>
#include <inttypes.h>
#include <vector>
#include "circuit.h"
#include "sparse_memory.h"
#include "btb.h"
#include "gshare.h"
#include "packets.h"

using namespace std;

// command line global variables
extern uint32_t cache_size;
extern uint32_t memory_latency;
extern uint32_t cache_line_size;
extern uint32_t num_cache_lines;
extern bool branch_pred_off;

uint64_t return_count = 0;
uint64_t mispredicted_return_count = 0;

// Debugging global variables defined in pipelined-model.cc
extern uint32_t debug_mask;

class ifetch : circuit
{
  void print_ra_stack() {
    printf("ptr: %d %d [", ra_stack_ptr(), next_ra_stack_ptr);
    for (int i = 0; i < RA_STACK_SIZE; i++) {
      printf("%08x ", ra_stack[i]);
    }
    printf("]\n");
  }
private:
  enum { RA_STACK_SIZE = 64 };
  sparse_memory *the_mem;
  btb target_buffer;
  gshare gshare_pred;

  statereg<uint32_t> mem_delay;

  // return address stack repair mechanism: every branch carries with
  // it the ra_stack_ptr and ra_stack_head state that existed at the
  // time of prediction.  If the branch turns out to mispredict, this
  // state is repaired.  This takes care of the following common case:
  // call (correctly predicted)
  // branch is mispredicted
  // (on wrong path): return (bogus)
  //                  call (this overwrites the stack head with bogus data)
  // branch mispeculation feedback arrives, ra_stack head is repaired
  // (on correct path): return (correctly pops head of ra_stack)
  vector<uint32_t> ra_stack;   // return address stack
  uint32_t next_ra_stack_ptr;

  void recalc() {
    uint32_t next_pc;
    bool icache_hit = false;

    if (debug_mask & 0x00001) {
      branch_info().print();
    }

    if (branch_info().is_branch) {
      gshare_pred.update(branch_info().instr_pc, branch_info().global_history >> 1, branch_info().taken);
    }

    if (branch_info().is_branch && branch_info().is_return) {
      return_count++;
      if (branch_info().mispredict) mispredicted_return_count++;
    }

    if (branch_info().is_branch && branch_info().mispredict) {
      // MISPREDICT!!!!
      if (debug_mask & 0x1) {
        printf("IFETCH: saw mispredict\n");
        fflush(stdout);
      }
      // fix up program_counter:
      prev_program_counter = 0xdead2fec;
      program_counter = branch_info().correct_target;
      notdecoded_instr = 0; // noop
      mem_stall = false;
      mem_delay = 0;
      if (branch_info().taken) {
        uint32_t value = branch_info().correct_target;
        uint32_t bits = branch_info().is_call;
        bits = (bits << 1) | branch_info().is_return;
        target_buffer.update_value(branch_info().instr_pc, value | bits);
      }
      // fix up global history by changing lsb:
      global_history = ((branch_info().global_history & ~(0x1)) | branch_info().taken);
      next_ra_stack_ptr = branch_info().ra_stack_ptr;
      ra_stack[(next_ra_stack_ptr + (RA_STACK_SIZE - 1)) % RA_STACK_SIZE] = branch_info().ra_stack_head;
      ra_stack_ptr = next_ra_stack_ptr;
      ra_stack_head = branch_info().ra_stack_head;
      if (branch_info().is_call) {
        // we should have pushed the ra stack
        ra_stack[next_ra_stack_ptr] = branch_info().instr_pc + 4;
        next_ra_stack_ptr = (next_ra_stack_ptr + 1) % RA_STACK_SIZE;
      }
      else if (branch_info().is_return) {
        // we should have popped the ra stack
        next_ra_stack_ptr = (next_ra_stack_ptr + (RA_STACK_SIZE - 1)) % RA_STACK_SIZE;
      }
    }
    else {
      // no mispredict
      // check external stall condtions
      if (!(scoreboard_stall() || rename_stall() || rob_stall())) {
        ra_stack_ptr = next_ra_stack_ptr;
        ra_stack_head = ra_stack[(next_ra_stack_ptr + (RA_STACK_SIZE - 1)) % RA_STACK_SIZE];
        int32_t buffer_way = target_buffer.check(program_counter());
        if (buffer_way >= 0) {
          uint32_t btb_val = target_buffer.get_value(program_counter(), buffer_way);
          if (btb_val & 0x1) {
            // it's a return: get target of return address stack
            next_ra_stack_ptr = (next_ra_stack_ptr + (RA_STACK_SIZE - 1)) % RA_STACK_SIZE;
            next_pc = ra_stack[next_ra_stack_ptr];
          }
          else {
            if (gshare_pred.predict(program_counter(), global_history())) {
              next_pc = btb_val & ~0x3;
              global_history = (global_history() << 1) | 1;
            }
            else {
              next_pc = program_counter() + 4;
              global_history = (global_history() << 1) | 0;
            }

          }
          if (btb_val & 0x2) {
            // it's a call: push the return address stack
            ra_stack[next_ra_stack_ptr] = program_counter() + 4;
            next_ra_stack_ptr = (next_ra_stack_ptr + 1) % RA_STACK_SIZE;
          }
        }
        else {
          next_pc = program_counter() + 4;
        }
        notdecoded_instr = the_mem->load_uint32(program_counter());
        prev_program_counter = program_counter();
        program_counter = next_pc;
      }
      // else: don't modify state: just stall
    }

    if(debug_mask & 0x00000001) {
      printf("IFETCH> pc %x prev_pc %x instr %x brp %x cachehit %d memstall %d stalldelay %d\n",
             program_counter(), prev_program_counter(),
             notdecoded_instr(), branch_prediction(), icache_hit, mem_stall(), mem_delay());
      printf("\t\t\t\n %d %d %d\n",
             scoreboard_stall(), rename_stall(), rob_stall());
    } 
  }

public:

  ifetch(sparse_memory *mem) :
    circuit(),
    the_mem(mem),
    target_buffer(1024, 4),     // 4096 total entries, 4-way set associative
    gshare_pred(13, 8),         // 8192 entry table: 8 bits of global history
    mem_delay(0),

    ra_stack(RA_STACK_SIZE),
    next_ra_stack_ptr(0),

    branch_info(),
    scoreboard_stall(),
    rename_stall(),
    rob_stall(),

    program_counter(99), 
    prev_program_counter(0xdead2fec),
    notdecoded_instr(0),
    mem_stall(false),
    branch_prediction(false),
    local_prediction(false),
    global_prediction(false),
    global_history(0x11),
    ra_stack_ptr(0),
    ra_stack_head(0)
    { }

  // inputs:
  inport<branch_packet> branch_info; // retiring branch from reorder buffer
  inport<bool> scoreboard_stall;          // scoreboard stalls fetch
  /*  inport<cacheline_t> instr_mem_line;*/
  inport<bool> rename_stall;
  inport<bool> rob_stall;

  // outputs:
  statereg<uint32_t> program_counter; 
  statereg<uint32_t> prev_program_counter;
  statereg<uint32_t> notdecoded_instr;
  statereg<bool> mem_stall;
  statereg<bool> branch_prediction;
  statereg<bool> local_prediction;
  statereg<bool> global_prediction;
  statereg<uint32_t> global_history;
  statereg<uint32_t> ra_stack_ptr;
  statereg<uint32_t> ra_stack_head;
};

#endif /* IFETCH_H_GUARD */
