#include "scheduler.h"

// static member definitions:
std::vector<scheduler::cancel_range_t> scheduler::cancel_ranges(0);

/*static*/
inline bool scheduler::inside_cancel_range(const uint32_t x_ctxt, const uint64_t x_ts) /*const*/ {
  // find the first range end point that is greater-than x_ts
  cancel_range_t::const_iterator range_end = cancel_ranges[x_ctxt].upper_bound(x_ts);
  if(range_end != cancel_ranges[x_ctxt].end()) {
    if(debug_mask & 0x0008) printf("scheduler::inside_cancel_range ctxt: %u ts: %llu range: (%llu %llu)\n",
                                   x_ctxt, x_ts, range_end->second, range_end->first);
    return (range_end->second <= x_ts);
  }
  else {
    return false;
  }
}

inline bool scheduler::safe_instr(const decoder::instr_h& instr) const {
  if (instr->is_syscall)
    return (rob_head()[instr->context] == instr->reorder_slot);
  else
    return true;
}

inline bool scheduler::registers_ready(const decoder::instr_h& the_instr) const {
  return (reg_avail[the_instr->phys_s_reg] &&  // no true dep
          reg_avail[the_instr->phys_t_reg]);
}

inline bool scheduler::ready_to_issue(const size_t slot) {
  decoder::instr_h the_instr = issue_buffer.get(slot).instr;
  return safe_instr(the_instr);
}

inline void scheduler::set_cancel_ts(const uint32_t x_ctxt, const uint64_t x_start_ts, const uint64_t x_end_ts) {
  // adjust the cancel ts ranges based on this cancelation:
  if(debug_mask & 0x0008) {
    printf("SCHD> set_cancel_ts: ");
    printf("ctxt: %u start_ts: %llu end_ts: %llu \n", x_ctxt, x_start_ts, x_end_ts);
  }

  // determine if this cancelation is within another cancelation range:
  // note:  the lower bound is inclusive.
  // note:  the upper bound is not inclusive, thus if X is the upper bound, it is 
  // possible that instruction num X is a valid instruction

  // determine the end of the new range to be created
  uint64_t new_end;
  cancel_range_t::iterator range_end = cancel_ranges[x_ctxt].upper_bound(x_end_ts);

  if(range_end != cancel_ranges[x_ctxt].end()) {
    if(x_end_ts < range_end->second) {
      // the new range ends before the start of the this range,
      // thus they are completely separate
      new_end = x_end_ts;
    }
    else {
      // the ranges overlap, thus the new range ends at the upperbound
      new_end = range_end->first;
    }
  }
  else {
    // the end of this range is after every other range's end
    new_end = x_end_ts;
  }

  // determine the start of the new range to be created
  uint64_t new_start;
  cancel_range_t::iterator range_start = cancel_ranges[x_ctxt].upper_bound(x_start_ts);
  
  if(range_start != cancel_ranges[x_ctxt].end()) {
    if(x_start_ts < range_start->second) {
      // the new range starts before this range
      new_start = x_start_ts;
    }
    else {
      // the ranges overlap
      new_start = range_start->second;
    }
  }
  else {
    // the start of this range is after every other range's end
    new_start = x_start_ts;
  }

  // remove overlapping ranges
  cancel_range_t::iterator itr = cancel_ranges[x_ctxt].upper_bound(new_start);
  while((itr != cancel_ranges[x_ctxt].end()) && (itr->first < new_end)) {
    cancel_ranges[x_ctxt].erase(itr);
    itr = cancel_ranges[x_ctxt].upper_bound(new_start);
  }

  // insert new range
  cancel_ranges[x_ctxt][new_end] = new_start;

  if(debug_mask & 0x0008) {
    printf("Cancel Ranges: \n");
    for(cancel_range_t::const_iterator itr = cancel_ranges[x_ctxt].begin();
        itr != cancel_ranges[x_ctxt].end();
        ++itr) {
      printf("(%llu:%llu)\n", itr->second, itr->first);
    }
  }
}

inline void scheduler::handle_misspeculation() {
  if (simpanic_in().valid || branch_exec().mispredict || mem_access_exec().misspeculation || syscall_exec().valid) {

    uint32_t cancel_context  = 0;
    uint64_t cancel_ts       = 0ULL;

    //simpanics are generated by retiring instructions in the ROB, and should be given priority over other mispredictions
    if(simpanic_in().valid) {
      cancel_context = simpanic_in().context;
      cancel_ts      = simpanic_in().instr_num + 1;
    }
    else if (mem_access_exec().misspeculation) {
      cancel_context = mem_access_exec().recovery_context;
      cancel_ts      = mem_access_exec().recovery_ts;
    }
    else if(branch_exec().mispredict) {
      cancel_context  = branch_exec().context;
      cancel_ts       = branch_exec().instr_num + 1;
    }
    else {
      cancel_context = syscall_exec().context;
      cancel_ts      = syscall_exec().instr_num + 1;
    }

    set_cancel_ts(cancel_context, cancel_ts, decode_instr_num()[cancel_context]);

    // flush the dependency window
    typedef vector<pair<uint32_t, uint64_t> > reg_tag_t;
    reg_tag_t reg_tags;
    dependency_window.flush_canceled_instrs(&inside_cancel_range, reg_tags, instr_dep_count);
    for(reg_tag_t::const_iterator itr = reg_tags.begin(); itr != reg_tags.end(); ++itr) {
      // do something here to clean up canceled tags if needed
    }

    // flush the issue buffer
    
    // clean out the issue buffer of canceled instructions which have issued:
    // this is needed since issuing does not remove them from the buffer,
    // which is kept for replay - not implemented - but we'll want it later
    size_t slot = issue_buffer.get_head();
    while(slot != issue_buffer.get_end()) {
      decoder::instr_h instr = issue_buffer.get(slot).instr;
      if(cancel_context == instr->context) {
        if(inside_cancel_range(cancel_context, instr->instr_num)) {
          cancel_slot(slot);
        }
      }
      slot = issue_buffer.get_next(slot);
    }
  }
}

inline void scheduler::handle_writeback() {
  // mark completed instruction
  uint32_t writeback_phys_reg = 0;
  if (writeback_bus().reorder_slot != INV_POOL_PTR) {
    size_t slot = writeback_bus().sb_slot;
    decoder::instr_h the_instr = issue_buffer.get(slot).instr;
    writeback_phys_reg = the_instr->phys_dest_reg;
    //    assert(writeback_bus().reg_tag == writeback_phys_reg);
    reg_avail[writeback_phys_reg] = true;
    if(debug_mask & 0x0008) printf("SCHD> writeback: ");
    register_ready_insert(make_pair(writeback_phys_reg, the_instr->instr_num));
    complete_slot(slot);
  }

  if (branch_exec().is_branch) {
    size_t slot = branch_exec().sb_slot;

    // jals and jalrs get completed by writeback_bus, not branch_exec
    if (issue_buffer.get(slot).instr->phys_dest_reg == 0) {
      if(debug_mask & 0x0008) printf("SCHD> branch bus: ");
      complete_slot(slot);
    }
  }
      
  // mark stores
  if (store_bus().reorder_slot != INV_POOL_PTR) {
    size_t slot = store_bus().sb_slot;
    if(debug_mask & 0x0008) printf("SCHD> store bus: ");
    complete_slot(slot);
  }

  // mark syscalls
  if (syscall_exec().valid) {
    size_t slot = syscall_exec().sb_slot;
    if(debug_mask & 0x0008) printf("SCHD> syscall bus: ");
    complete_slot(slot);
  }
}

inline void scheduler::issue_buffer_insert(decoder::instr_h x_instr) {
  // only works since instr is a gcp: the assignment effects the inserted object, too
  x_instr->sb_slot = issue_buffer.insert(issue_buffer_object(x_instr));
  instr_dep_count[0] = issue_buffer.num_used();
  if(debug_mask & 0x0008) {
    x_instr->print("SCHD> issue_buffer_insert:");
    printf(" sb_slot: %u\n", x_instr->sb_slot);
  }
}

inline void scheduler::register_ready_insert(const ready_reg_t& x_reg) {
  ready_registers.push_back(x_reg);
}

// wakeup()
void scheduler::wakeup() {
  if(debug_mask) {
    printf("SCHD::WAKEUP::READYREGS:  ");
    for(reg_list_t::iterator r = ready_registers.begin(); r != ready_registers.end(); ++r) {
      printf("reg %d\t", r->first);
    }
    printf("\n");
  }

  if((cycle_count % superpipeline_factor) == 0) {
    cycle_wakeup_success = 0;
  }

  while(!ready_registers.empty()) {
    pair<uint32_t, uint64_t> ready_reg = ready_registers.front();
    ready_registers.pop_front();
    uint64_t reg_ts = ready_reg.second;
    uint32_t reg_tag = ready_reg.first;

    // .consume() return values
    decoder::instr_h instr;
    bool chain_end = true;
    uint32_t num_deps = 0;
    
    if(debug_mask & 0x0008) printf("SCHD> wakeup reg %u\n", reg_tag);

    wakeup_result_e wakeup_result = dependency_window.consume(reg_tag, reg_ts, instr, chain_end, num_deps);
    ++wakeup_attempts_total;

    switch(wakeup_result) {
    case WAKEUP_SUCCESS:
      if(debug_mask & 0x0008) {
        instr->print("SCHD> wakeup - instr:");
        printf("\n");
      }
      issue_buffer_insert(instr);
      assert(instr_dep_count[1] > 0);
      --instr_dep_count[1];
      assert(num_deps == 0);
      ++wakeup_attempts_success;
      ++cycle_wakeup_success;
      break;
    case WAKEUP_INSTR_NOT_READY:
      ++wakeup_attempts_instr_not_ready;
      break;
    case WAKEUP_EMPTY_CHAIN:
      ++wakeup_attempts_wasted;
      break;
    default:
      printf("%u\n", wakeup_result);
      assert(0&&"Unhandled Wakeup Result!");
    }

    if(num_deps) {
      assert(instr_dep_count[num_deps + 1] > 0);
      ++instr_dep_count[num_deps];
      --instr_dep_count[num_deps + 1];
    }

    if(!chain_end) {
      if(debug_mask & 0x0008) printf("SCHD> rescheduling reg %u\n", reg_tag);
      // instrs left to wake up on this reg
      if(schd_reg_dep_first) {
        ready_registers.push_front(ready_reg);
      }
      else {
        ready_registers.push_back(ready_reg);
      }
    }
  }
}

inline void scheduler::issue_instr(const size_t slot) {
  decoder::instr_h the_instr = issue_buffer.get(slot).instr;

  instr_out = the_instr;    
  issue_buffer.get(slot).issued = true;

  // no-ops should be marked complete on issue (they will never writeback):
  // this code is required because instructions like:
  // "mulhu   $00 $00 $00" are not decoded as no-ops.
  if((the_instr->phys_dest_reg == 0) && !the_instr->is_branch && 
     !the_instr->is_jump && !the_instr->is_store && 
     !the_instr->is_load && !the_instr->is_syscall) {
    complete_slot(slot);
  }
}

inline void scheduler::cancel_instr(const decoder::instr_h& x_instr) {
  // function used to clean up register tags (if needed)
}

inline void scheduler::cancel_slot(const size_t slot) {
  issue_buffer.get(slot).issued = true;
  cancel_instr(issue_buffer.get(slot).instr);
  complete_slot(slot);
}

inline void scheduler::complete_slot(const size_t slot) {
  const decoder::instr_h& the_instr = issue_buffer.get(slot).instr;
  assert(the_instr != decoder::noop);

  if(debug_mask & 0x0008)
    printf("SCHD> completing instr in slot %u\n", slot);

  assert(issue_buffer.get(slot).issued == true);
  assert(issue_buffer.get(slot).completed == false);

  issue_buffer.get(slot).completed = true;
  issue_buffer.remove(slot);
  instr_dep_count[0] = issue_buffer.num_used();
}

inline void scheduler::issue() {
  // find an instruction to issue, if possible
  instr_out      = decoder::noop;
 
  // iterate over the list
  size_t slot = issue_buffer.get_head();
  while(slot != issue_buffer.get_end()) {
    if(!issue_buffer.get(slot).issued && ready_to_issue(slot)) {
      decoder::instr_h issuing_instr = issue_buffer.get(slot).instr;

      if(inside_cancel_range(issuing_instr->context, issuing_instr->instr_num)) {
        if (debug_mask & 0x00000008) {
          printf("SCHD> CANCELED INSTR: ");
        }

        cancel_slot(slot);
      }
      else {
        if (debug_mask & 0x00000008) {
          printf("SCHD> ISSUING INSTR: ");
        }
        
        issue_instr(slot);
      }

      if(debug_mask & 0x0008) {
        issuing_instr->print("SCHD> IB ");
        printf("\n");
      }
      break;
    }
    slot = issue_buffer.get_next(slot);
  }
}

inline void scheduler::dispatch() {

  // if rename gave us an instruction we better remember it
  if(!instr_in()->noop) {
    bool canceled = (simpanic_in().mispredicted_context(instr_in()->context) ||
                     branch_exec().mispredicted_context(instr_in()->context) ||
                     mem_access_exec().mispredicted_context(instr_in()->context) ||
                     (syscall_exec().valid && (instr_in()->context == syscall_exec().context)));

    if(!canceled) {
      decoder::instr_h mod_instr = instr_in();

      if(registers_ready(mod_instr)) {
        if(debug_mask & 0x0008) printf("SCHD DISPATCH> renamed instr ready\n");
        ++dispatch_dep_count[0];
        issue_buffer_insert(mod_instr);
      }
      else {
        bool s_dep = reg_avail[mod_instr->phys_s_reg];
        bool t_dep = reg_avail[mod_instr->phys_t_reg];
        bool s_t_unequal = (mod_instr->phys_s_reg != mod_instr->phys_t_reg);
        uint32_t deps = !s_dep + (!t_dep & s_t_unequal);
        ++dispatch_dep_count[deps];

        // insert instruction
        if(dependency_window.insert(mod_instr, s_dep, t_dep)) {
          if(debug_mask & 0x0008) printf("SCHD DISPATCH> renamed instr in dep window\n");
          ++instr_dep_count[deps];
        }
        else {
          assert(0 && "SCHD DISPATCH> Dependency window insertion failure!");
        }
      }

      reg_avail[mod_instr->phys_dest_reg] = false;
      reg_avail[0] = true;
    }

    if (instr_in()->is_store) {
      instr_in()->stq_slot = the_exec_unit->the_stq.allocate(instr_in()->context, instr_in()->instr_num);
    }
    else if (instr_in()->is_load) {
      instr_in()->ldq_slot = the_exec_unit->the_ldq.allocate(instr_in()->context, instr_in()->instr_num);
    }
  }
}

void scheduler::account_phys_regs(vector<uint32_t>& ret_phys_regs) const {
  for(reg_list_t::const_iterator itr = ready_registers.begin(); itr != ready_registers.end(); ++itr) {
    if(itr->first < num_phys_regs)
      ret_phys_regs[itr->first] = 1;
  }
}

void scheduler::handle_retirement() {
  // clean up cancel_ranges
  if (retiring_instr()->completed && 
      (retiring_instr()->is_branch || retiring_instr()->is_jump)) {
    uint64_t cancel_ts = retiring_instr()->instr_num;
    uint32_t c = retiring_instr()->context;

    while(!cancel_ranges[c].empty() && (cancel_ranges[c].begin()->first < cancel_ts)) {
      cancel_ranges[c].erase(cancel_ranges[c].begin());
    }
  }
}

void scheduler::set_stall_conditions() {
  if(debug_mask & 0x0008) printf("issue buffer size: %u dep window size: %u\n", issue_buffer.num_used(), dependency_window.num_used());

  // there are possibly two instrs inserted into the combined structures per cycle
  // (this is done to simulate an "ideal" scheduler)
  bool window_size_stall = ((issue_buffer.num_used() + dependency_window.num_used() + 3) > issue_buffer_size);
  bool lsq_stall = (the_exec_unit->the_stq.stall_stores() || the_exec_unit->the_ldq.stall_loads());

  if (lsq_stall && window_size_stall) {
    if (the_exec_unit->the_stq.stall_stores())
      lsq_store_stall_cnt++;
    else if (the_exec_unit->the_ldq.stall_loads())
      lsq_load_stall_cnt++;
  }

  scoreboard_stall = window_size_stall || lsq_stall;
}

void scheduler::handle_statistics() {
  assert(dependency_window.num_used() == (instr_dep_count[1] + 
                                          instr_dep_count[2] + 
                                          instr_dep_count[3]));
  assert(issue_buffer.num_used() == instr_dep_count[0]);

  scheduler_size_sum += (issue_buffer.num_used() +
                         dependency_window.num_used());

  if(bullet_proof_check_cycle) {
    vector<uint32_t> dblchk = dependency_window.count_dependencies();
    for(int i = 1; i < 4; ++i) {
      bool insane = false;
      if(dblchk[i] != instr_dep_count[i]) {
        printf("%u d %u r %u\n", i, dblchk[i], instr_dep_count[i]);
        insane = true;
      }
      if(insane) exit(1);
    }
  }
}

void scheduler::recalc() {
  if(debug_mask & 0x0008) printf("SCHD>  recalc:\n");
  
  handle_statistics();

  try {
    // wb expects slots/entries to exist for all wb events
    // should be done before handling misspeculation
    handle_writeback(); 
    handle_misspeculation();
    wakeup();
    dispatch();
    issue();
    handle_retirement();
    set_stall_conditions();
  }
  catch(const exception& e) {
    cout << e.what();
  }
}

