#include "ifetch.h"


// handles detection of a branch misprediction by setting the pc to point to the
//  branch's correct target and by repairing the branch prediction structures.
void ifetch::handle_mispred(bool is_branch, bool is_cond, bool taken, bool is_call, bool is_return,
                            uint32_t ctext, uint32_t instr_pc, uint32_t correct_target,
                            uint32_t ras_ptr, uint32_t ras_head) {

  pc[ctext] = correct_target;
  //repair global history, RAS, btb
  next_ra_stack_ptr[ctext] = ras_ptr;
  ra_stack[ctext][(next_ra_stack_ptr[ctext] + (ra_stack_size - 1)) % ra_stack_size] = ras_head;
    
  //old value is updated -- will be used in bp logic later 
  if (is_cond) {
    //any updations related code goes here
  }
    
  // clean up RAS
  if (is_call) {
    // should have pushed the ra stack
    ra_stack[ctext][next_ra_stack_ptr[ctext]] = instr_pc + 4;
    next_ra_stack_ptr[ctext] = (next_ra_stack_ptr[ctext] + 1) % ra_stack_size;
  }
  else if (is_return) {
    // should have popped the ra stack
    next_ra_stack_ptr[ctext] = (next_ra_stack_ptr[ctext] + (ra_stack_size - 1)) % ra_stack_size;
  }
    
  // clean up BTB
  if (taken) {
    uint32_t value = correct_target;
    uint32_t bits  = (is_call ? 0x2 : ( is_return ? 0x1 : ( is_cond ? 0x0 : 0x3)));
    //      cout << hex << " updating BTB 0x" << instr_pc << " with " << value << endl;
    target_buffer.update_value(instr_pc, value | bits);
  }
}
  
int ifetch::get_ras_hash(int ctext) {
  int ptr = next_ra_stack_ptr[ctext];
  int ret = 0;
  for(int i = 0; i < 5; i++) {
    ret = ret | (((ra_stack[ctext][(ptr+ ra_stack_size - 1 - i) %ra_stack_size] >> 2 )&0x3F) << (i*6));
  }
  return ret;
}


void ifetch::print_ra_stack() {
  for (uint32_t context = 0; context < num_contexts; context++) {
    printf("context %u - ra stack ptr: %d [", context, next_ra_stack_ptr[context]);
    for (uint32_t i = 0; i < ra_stack_size; i++)
      printf("%08x ", ra_stack[context][i]);
    printf("]\n");
  }
}

void ifetch::debug_ifetch() {

  if(debug_mask & 0x0001) {
    printf("IFETCH> ctext %d ctext_pc %x real_pc %x prev_ctext %d prev_ctext_pc %x prev_pc_nextpc %x prev_instr %x\n",
           pc_context(), pc[pc_context()], program_counter(), prev_pc_context(), prev_program_counter(), prev_pc_prediction(), notdecoded_instr());
    printf("\t\t\tscoreboard stall: %d, rename stall: %d, misspec_penalty: %u, rob_stall (context, stall): ",
           scoreboard_stall(), rename_stall(), misspec_penalty);
    for(uint32_t c=0; c < num_contexts; ++c) {
      printf("%x-%d ", c, rob_stall()[c]);
    }
    printf(", lsq_stall: ");
    for(uint32_t c=0; c < num_contexts; ++c) {
      printf("%x-%d ", c, lsq_stall()[c]);
    }
    printf("\n");

    if (branch_exec().mispredict)
      printf("IFETCH> saw back-end misprediction -- ");
    branch_exec().print();
    
    if (syscall_exec().valid)
      printf("IFETCH> saw flush due to syscall -- ");
    syscall_exec().print();

    if (simpanic_in().valid)
      printf("IFETCH> saw simpanic -- ");
    simpanic_in().print();

    if (mem_access_exec().misspeculation)
      printf("IFETCH> saw misspeculation -- ");
    mem_access_exec().print();

    if (simpanic_in().valid)
      printf("IFETCH> saw simpanic! -- ");
    simpanic_in().print();
  }
}
  

inline void ifetch::returns_stat_collection()
{
  if (branch_exec().is_branch && branch_exec().is_return) {
    return_count++;
    if (branch_exec().mispredict)
      mispredicted_return_count++;
  }
}

inline bool ifetch::flush_on_context(uint32_t context)
{
  if ((simpanic_in().mispredicted_context(pc_context()))      || 
      (branch_exec().mispredicted_context(pc_context()))      ||
      (syscall_exec().valid && syscall_exec().context == pc_context() ) ||
      (mem_access_exec().mispredicted_context(pc_context()))) {
    return true;
  }
  else {
    return false;
  }
}


inline void ifetch::init_state() 
{
  if (misspec_penalty > 0)
    misspec_penalty--;

  // set it to false for start of next "cycle"
  if (cycle_count % superpipeline_factor == (superpipeline_factor - 1))
    hit_branch = false;

  next_pc = 0;
}

inline void ifetch::account_for_stalls()
{
  num_rename_stall_cycles += rename_stall();
  num_scoreboard_stall_cycles += scoreboard_stall();
  num_rob_stall_cycles_ns += rob_stall()[pc_context()];
  num_lsq_stall_cycles_ns += lsq_stall()[pc_context()];
}


inline void ifetch::fix_current_pc_on_mispredict()
{
  if(simpanic_in().mispredicted_context(pc_context())) {
    program_counter = simpanic_in().next_pc;
  }
  else if (mem_access_exec().mispredicted_context(pc_context())) {
    program_counter = mem_access_exec().recovery_pc;
  }
  else if (branch_exec().mispredicted_context(pc_context())) {
    program_counter = branch_exec().correct_target;
  }
  else if (syscall_exec().valid && pc_context() == syscall_exec().context) {
    program_counter = syscall_exec().program_counter + 4;
  }
}

inline void ifetch::repair_ras_on_mispredict(uint32_t x_next_ra_stack_ptr, uint32_t ra_stack_head, uint32_t c) 
{
  next_ra_stack_ptr[c] = x_next_ra_stack_ptr;
  ra_stack[c][(next_ra_stack_ptr[c] + (ra_stack_size - 1)) % ra_stack_size] = ra_stack_head;
}



inline void ifetch::fix_state_on_mispredict()
{
  //note: simpanic takes precedence over everything else.
  //don't need to check if the instruction was a call/ret: that will be fixed anyway when the misprediction is handled on instruction execution
  if(simpanic_in().valid) {
    uint32_t c  = simpanic_in().context;
    pc[c] = simpanic_in().next_pc;
    if(debug_mask & 0x1)
      printf("setting PC[context] correctly on SIMPANIC!\n\n");

    repair_ras_on_mispredict(simpanic_in().ra_stack_ptr, simpanic_in().ra_stack_head, c);
  }
  // note: mem_access_exec()/branch_exec()/syscall_exec() refer to the same instruction and should
  // be mutually exclusive
  else if (mem_access_exec().misspeculation) {
    uint32_t c  = mem_access_exec().recovery_context;
    pc[c] = mem_access_exec().recovery_pc;

    repair_ras_on_mispredict(branch_exec().ra_stack_ptr, branch_exec().ra_stack_head, c);

    assert(!branch_exec().is_branch);
    assert(!branch_exec().is_call);
    assert(!branch_exec().taken);
    assert(!branch_exec().is_return);
  }
  else if (branch_exec().mispredict) {
    handle_mispred(branch_exec().is_branch, branch_exec().is_cond, 
                   branch_exec().taken, branch_exec().is_call, branch_exec().is_return,
                   branch_exec().context, branch_exec().instr_pc, branch_exec().correct_target,
                   branch_exec().ra_stack_ptr, branch_exec().ra_stack_head);
  }
  else if (syscall_exec().valid) {
    syscall_flushes ++;
    uint32_t c  = syscall_exec().context;
    pc[c] = syscall_exec().program_counter + 4;
    if(debug_mask & 0x1)
      printf("setting PC[context] correctly on syscall in cycle %llu!\n\n", cycle_count);

    repair_ras_on_mispredict(syscall_exec().ra_stack_ptr, syscall_exec().ra_stack_head, c);
  }
}

inline void ifetch::handle_mispred_on_stall() 
{
  if ( (simpanic_in().mispredicted_context(prev_pc_context()))     ||
       (branch_exec().mispredicted_context(prev_pc_context()))     ||
       (mem_access_exec().mispredicted_context(prev_pc_context())) ||
       (syscall_exec().valid && syscall_exec().context == prev_pc_context())) {
    // state: stall detected and misprediction/misspeculation detected in the thread
    //  that fetched the instruction that is in the output latch
    // action: flush the instruction in the output latch

    // flush the instruction in the output latch (insert a noop into the pipeline)
    notdecoded_instr = 0; // noop

    prev_program_counter = 0xdead2fec;

    // select the return address stack of the thread that mispredicted/misspeculated
    ra_stack_ptr = 0;
    ra_stack_head = 0;
  }
}

inline void ifetch::send_nop_on_mispredict()
{
  // insert a noop into the pipeline
  notdecoded_instr = 0; // noop
 
  prev_program_counter = 0xdead2fec;
  prev_pc_context = pc_context();

  // selectd the return address stack of the thread that mispredicted
  ra_stack_ptr = 0;
  ra_stack_head = 0;
}


inline void ifetch::insert_nop_on_stall()
{
  notdecoded_instr = 0; // noop
  prev_program_counter = 0xdead2fec;
  prev_pc_context = pc_context();
}

inline void ifetch::insert_nop_on_hit_branch()
{

  notdecoded_instr = 0; // noop
  prev_program_counter = 0xdead2fec;
  prev_pc_context = pc_context();
}

inline uint32_t ifetch::perform_branch_prediction(bool * is_return)
{
  uint32_t predicted_pc;

  // consult the branch predictor
  *is_return = false;
  uint32_t buffer_way = 0;
  bool tag_check = target_buffer.check(program_counter(), buffer_way);
  bool is_predicted = false;


  if (tag_check) {
    btb_miss_out = false;
    hit_branch = !aggressive_fetch;

    uint32_t btb_val = target_buffer.get_value(program_counter(), buffer_way);
    
    //Lower 2 bits contain info about the type of control flow instr
    if ((btb_val & 0x3)==0x1) {
      //return: get target off return address stack
      next_ra_stack_ptr[pc_context()] = (next_ra_stack_ptr[pc_context()] + (ra_stack_size - 1)) % ra_stack_size;
      predicted_pc = ra_stack[pc_context()][next_ra_stack_ptr[pc_context()]];
      *is_return = true;
    }
    else if ((btb_val & 0x3)==0x2) {
      //call: push the return address stack
      ra_stack[pc_context()][next_ra_stack_ptr[pc_context()]] = program_counter() + 4;
      next_ra_stack_ptr[pc_context()] = (next_ra_stack_ptr[pc_context()] + 1) % ra_stack_size;
      predicted_pc = btb_val & ~0x3;
    }
    else if((btb_val & 0x3)==0x3) {
      //uncondional branch:
      predicted_pc = btb_val & ~0x3;
    } 
    else {
      //conditional branch:  consult predictor
      is_predicted = true;
      //NOTE: Branches are always predicted to be taken !!
      bool pred = true;

      // choose next pc based on prediction, update history
      if (pred) {
        predicted_pc = btb_val & ~0x3; 
      }
      else {
        predicted_pc = program_counter() + 4; 
      }
    }

  }
  else {
    if(debug_mask & 0x0001) printf("IFETCH> btb miss\n");
    btb_miss_out = true;
    predicted_pc = program_counter() + 4;
  }
  return predicted_pc;
}

inline void ifetch::fetch_instruction(bool * is_return)
{
  static uint32_t fetched_instr;

  // fetch the instruction and pass it to the decoder
  fetched_instr = the_mem[pc_context()].load_uint32(program_counter());

  // send the instruction to the output
  notdecoded_instr = fetched_instr;
  prev_program_counter = program_counter();
  prev_pc_context = pc_context();
  ++num_fetched_instr;
  ++(*num_fetched)[pc_context()];


  // SMT (hack): Thread contention in the predictor creates a situation where
  //  a noop in one thread is mistakenly predicted as a taken branch. Because
  //  the noop never really executes, the misprediction is never detected by
  //  the back end of the pipeline. This hack ensures that noops are
  //  never predicted as branches:
  if (fetched_instr == 0) {
    *is_return = false;
    next_pc = program_counter() + 4;
  }
  pc[pc_context()] = next_pc;
  prev_pc_prediction = next_pc;
}

inline void ifetch::send_nop_on_icache_miss(uint64_t cycle_ready)
{
  
  notdecoded_instr = 0; // noop
  prev_program_counter = 0xdead2fec;
  prev_pc_context = pc_context();
  cache_cycle_ready[pc_context()] = cycle_ready;
}


void ifetch::recalc() {

  receive_retiring_branch();

  init_state();

  debug_ifetch(); //print debug information

  account_for_stalls(); //stat collection

    
  // Fix the pc of the instruction that was going to be fetched, if mispredicted:
  fix_current_pc_on_mispredict();

  // Fix the program counters and RAS on mispredict/misspeculation:
  fix_state_on_mispredict();

  returns_stat_collection();// stats

  // Check for stall conditions, and fetch the next instruction if able:
  if (scoreboard_stall() || rename_stall() || rob_stall()[0] || misspec_penalty) {
    // check for mispredictions during front-end stall
    handle_mispred_on_stall();
  }
  else {
    // no stall detected

    // check for misprediction on the context to be fetched
    if(flush_on_context(pc_context())) {
         // state: no stall detected, but misprediction detected in the thread
         //  scheduled to fetch during this cycle
         // action: insert a nop
      send_nop_on_mispredict();
    }
    else {
      // no misprediction/kill/stall on to-be-fetched context
      // check for rob stall of the context to be fetched
      if(rob_stall()[pc_context()] || lsq_stall()[pc_context()] ) {
        insert_nop_on_stall();
      }
      // fetch the instruction, if we havent gone past a branch yet this cycle
      // note that hit_branch() is set by perform_branch_prediction
      else if (hit_branch()) {
        insert_nop_on_hit_branch();
      } 
      else {
        // state: no stall and no misprediction detected 
        // action: fetch an instruction and put it on output latch

        // select the return address stack of the thread that fetches during this cycle
        ra_stack_ptr  = next_ra_stack_ptr[pc_context()];
        ra_stack_head = ra_stack[pc_context()][(next_ra_stack_ptr[pc_context()] + (ra_stack_size - 1)) % ra_stack_size];

        // check the icache for a hit, and stall until there is a hit
        uint64_t cycle_ready;
        uint32_t current_pc = program_counter();
        bool real_hit = access_icache(current_pc, &cycle_ready);
        bool pc_isret;
        if (real_hit) {
          //hit in ICache!
          next_pc = perform_branch_prediction(&pc_isret);
          //put instruction on output latch
          fetch_instruction(&pc_isret);
        } 
        else {
         //cache miss, send noops
          send_nop_on_icache_miss(cycle_ready);
        } 
      } // end fetch instruction

      // schedule the next instruction to be fetched
      uint32_t next_context = 0;
      program_counter = pc[next_context];
      pc_context = next_context;

    } // end test for misprediction
  } // end no stall detected

}

bool ifetch::access_icache(uint32_t x_pc, uint64_t * cycle_ready)
{
  *cycle_ready = 0;
  uint32_t way = 0;
  uint32_t l2way = 0;
  uint64_t l2cycle_ready = 0;
  bool l2hit = 0;

  bool hit = icache.check(x_pc, way);
  if (hit) {
    //get the cycle ready, get_value updates lru
    (*cycle_ready) = icache.get_value(x_pc, way);
  } else {
    // look in L2
    l2hit = l2_cache->check(x_pc, l2way);
    if (l2hit) {
      l2cycle_ready = l2_cache->get_value(x_pc, l2way);
      if (l2cycle_ready <= cycle_count)
        l2cycle_ready = cycle_count;
      (*cycle_ready) = l2cycle_ready + iL1_delay * superpipeline_factor;
    } else {
      (*cycle_ready) = cycle_count + L2_delay * superpipeline_factor;
      //            l2_cache->conditional_update_value(x_pc, (*cycle_ready), num_contexts * superpipeline_factor);
      l2_cache->conditional_update_value(x_pc, *cycle_ready, superpipeline_factor);
    }
            
    if ((*cycle_ready) > (cycle_count + iL1_delay * superpipeline_factor))
      il2_misses++;
    else
      il2_hits++;
            
    //          icache.conditional_update_value(x_pc, (*cycle_ready), num_contexts * superpipeline_factor);
    icache.conditional_update_value(x_pc, *cycle_ready, superpipeline_factor);
  }

  bool real_hit = (*cycle_ready) <= cycle_count;

  if(debug_mask & 0x0001) {
    printf("IFETCH> pc: %x L1_hit: %x L1_way: %d  L2_hit: %x L2_way: %d real_hit: %x cycle_ready: %llu\n", program_counter(), hit, way, l2hit, l2way, real_hit, *cycle_ready);
  }

  icache_accesses += (!hit || real_hit);
  icache_hits += real_hit; 

  return real_hit;
}
 
  
ifetch::~ifetch() {
  delete [] ra_stack;
}

void ifetch::receive_retiring_branch() {
  //We currently do nothing for retiring branches in ifetch !!
}
