#ifndef IFETCH_H_GUARD
#define IFETCH_H_GUARD

#include <stdio.h>
#include <inttypes.h>
#include <vector>
#include <algorithm>
#include "circuit.h"
#include "cache.h"
#include "packets.h"
#include "reg_data.h"
#include "globals.h"

#include "sparse_memory.h"

// Return Address Stack
// Repair mechanism: every branch carries with
// it the ra_stack_ptr and ra_stack_head state that existed at the
// time of prediction. If the branch turns out to mispredict, this
// state is repaired. This takes care of the following common case:
// call (correctly predicted)
// branch is mispredicted
// (on wrong path): return (bogus)
//                  call (this overwrites the stack head with bogus data)
// branch mispeculation feedback arrives, ra_stack head is repaired
// (on correct path): return (correctly pops head of ra_stack)

using namespace std;

// command line global variables
extern uint32_t cache_size;
extern uint32_t memory_latency;
extern uint32_t iL1_lsize;
extern uint32_t iL1_assoc;
extern uint32_t iL1_lines;
extern uint32_t iL1_delay;
extern uint32_t L2_delay;
extern uint32_t num_contexts;
extern uint32_t ra_stack_size;
extern uint32_t debug_mask;      // ifetch stage debug_mask is 0x00000001
extern uint64_t num_fetched_instr;
extern uint64_t syscall_flushes; 
extern bool selective_kill;
extern uint32_t fetch_bias;
extern bool switch_fetch_every_cycle;

extern bool enable_ping;

extern bool aggressive_fetch;

extern uint64_t cycle_count;
extern uint32_t superpipeline_factor;

extern uint64_t return_count;
extern uint64_t mispredicted_return_count;
extern uint64_t icache_hits;
extern uint64_t icache_accesses;
extern uint64_t il2_misses;
extern uint64_t il2_hits;

extern bool use_oracle_bpred;

extern uint64_t num_rename_stall_cycles;
extern uint64_t num_scoreboard_stall_cycles;
extern uint64_t num_rob_stall_cycles_ns;
extern uint64_t num_lsq_stall_cycles_ns;

#define PING_FREQ 0x00010000

class ifetch : circuit
{
private:
  sparse_memory* the_mem;

  // address generator data structures
  cache<uint32_t>    target_buffer;
  vector<uint32_t>*  ra_stack;
  vector<uint32_t>  next_ra_stack_ptr;  // points to next available slot in ra stack

  cache<uint64_t>     & icache; // cache of timestamps that hold when a cacheline is ready to be ready
  cache<uint64_t>     *l2_cache;

  //added for use with oracle information gotten from arch_trace
  vector<uint64_t> start_instr_num;
  vector<uint64_t> curr_instr_num;
  uint32_t fetch_cycle;
  vector<uint64_t> cache_cycle_ready;

  uint32_t next_pc;

  enum e_ifetch_mispred_type { t_if_branch_mispred, t_if_mem_mispred, 
                               t_if_syscall, t_if_simpanic, t_if_store_set_mispred};

  uint32_t unfetchable_occurance_count;
  uint32_t last_unfetchable_context;

  void account_unfetchable(uint32_t context);
  
  void handle_mispred(bool is_branch, bool is_cond, bool taken, bool is_call, bool is_return,
                      uint32_t ctext, uint32_t instr_pc, uint32_t correct_target,
                      uint32_t ras_ptr, uint32_t ras_head);
  
  int get_ras_hash(int ctext);

  // randomly generates a real number between 0.0 and 1.0
  double frand() { return ((double)rand() / (double)RAND_MAX); }

  void print_ra_stack();

  void debug_ifetch();
  
  //updates fetch structures with branch info
  void receive_retiring_branch();

  bool access_icache(uint32_t x_pc, uint64_t * cycle_ready);

  void send_nop_on_icache_miss(uint64_t cycle_ready);

  void fetch_instruction(bool * is_return);

  uint32_t perform_branch_prediction(bool * is_return);

  void insert_nop_on_hit_branch();

  void insert_nop_on_stall(); //rob_stall/lsq_stall/badpath_stall

  void send_nop_on_mispredict();

  void handle_mispred_on_stall() ;

  void fix_instr_nums();

  void fix_state_on_mispredict();

  void fix_current_pc_on_mispredict();

  void repair_ras_on_mispredict(uint32_t x_next_ra_stack_ptr, uint32_t ra_stack_head, uint32_t c) ;

  void account_for_stalls();

  void init_state() ;

  void returns_stat_collection();
  
  bool flush_on_context(uint32_t context);

  void recalc();    

 public:

  void print_schedule_pc_stats();

  void initialize_oracle_state();

  vector<uint32_t> pc;

  ~ifetch();


  ifetch(sparse_memory* mem, cache<uint64_t> &an_icache, cache<uint64_t> *the_l2) :
    circuit(),
    the_mem(mem),
    target_buffer(1024, 4, 2, 4),     // 4096 total entries, 4-way set associative, 4bytes per line
                                      // effectively, since 4 bytes per instruction
    next_ra_stack_ptr(num_contexts,  0),
    icache(an_icache),
    l2_cache(the_l2),

    start_instr_num(num_contexts, 0),
    curr_instr_num(num_contexts, 0),
    fetch_cycle(0),
    cache_cycle_ready(num_contexts, 0),
    unfetchable_occurance_count(0),
    last_unfetchable_context(0),

    pc(num_contexts, 99),
    misspec_penalty(0),
    hit_branch(false),
    branch_exec(),
    scoreboard_stall(),
    rename_stall(),
    rob_stall(),
    syscall_exec(),
    simpanic_in(),
    retiring_instr(), 

    program_counter(99),
    pc_context(0),
    prev_program_counter(0xdead2fec),
    prev_pc_context(0),
    prev_pc_prediction(0xdeaddeed),
    notdecoded_instr(0),
    ra_stack_ptr(0),
    ra_stack_head(0),
    num_fetched(vector<uint64_t>(num_contexts, 0)),
    btb_miss_out()
  {
    // SMT: one ra stack per context
    ra_stack = new vector<uint32_t>[num_contexts];
    for (uint32_t context=0; context<num_contexts; context++) {
      ra_stack[context].resize(ra_stack_size);
    }
  }

  // local var
  uint32_t misspec_penalty;
  statereg<uint8_t> hit_branch;

  // inputs:
  inport<branch_packet> branch_exec;     // branch misprediction info for completing instruction
  inport<mem_access_packet> mem_access_exec; // load/store info for completing instruction
  inport<uint8_t> scoreboard_stall;
  inport<uint8_t> rename_stall;
  inport<vector<uint8_t> > rob_stall;
  inport<vector<int> > lsq_stall;
  inport<syscall_exec_packet> syscall_exec;  //syscall execution packet from exec unit
  inport<simpanic_packet> simpanic_in;   //simpanic from rob
  inport<branch_packet> branch_ret; 
  inport<decoder::instr_h> retiring_instr; //retiring instruction from the rob.

  // outputs:
  statereg<uint32_t> program_counter;      // pc to be fetched this cycle
  statereg<uint32_t> pc_context;           // context of the current pc
  statereg<uint32_t> prev_program_counter; // pc fetched in the previous cycle
  statereg<uint32_t> prev_pc_context;      // context of the previous pc
  statereg<uint32_t> prev_pc_prediction;   // predicted pc for the prev context (for br pred)
  statereg<uint32_t> notdecoded_instr;     // instruction provided to decoder 

  statereg<uint32_t> ra_stack_ptr;
  statereg<uint32_t> ra_stack_head;
  statereg<vector<uint64_t> >num_fetched;
  statereg<uint8_t> btb_miss_out;
};

#endif /* IFETCH_H_GUARD */
