#ifndef ROB_H
#define ROB_H

#include "circuit.h"
#include "regfile_circuit.h"
#include "decode.h"
#include "packets.h"
#include "arch-model.h"
#include "sparse_memory.h"
#include "pretty_print.h"
#include "globals.h"
#include "pool_alloc.h"

using namespace std;
// SMT:
// 1. Added multiple arch-models to verify that each thread retire
//    its instructions correctly and in order.
// 2. On a misprediction/misspeculation, the dispatched instruction is marked
//    as completed and canceled only if if it belongs to the thread that retired
//    the misprediction/misspeculation on the previous clock cycle.
// 3. On the cycle after a mispredicted branch misspeculated load/store retires,
//    cancel only instructions that belong to the thread that retired the misprediction.
// 4. At present the arch-model actually updates the memory model when a store
//    instruction retires. We should probably let the pipelined-model update the memory.

// Alpha 21264 Style Renaming
// 1. Correct info on any load/store must be passed back to the exec unit, even
//    if the load/store is canceled. A load/store might complete, then be
//    canceled when a misprediction/misspeculation is detected in an instruction
//    that was fetched before the load/store. Thus, the completed load/store will
//    be canceled. In that case, the only way to retire the load/store from the
//    bypass/timestamp caches is to pass the relevant info back to the exec
//    unit when the (canceled) load/store retires.
// 2. Branch feedback from the rob is no longer used, so the branch buf
//    can probably be ripped out.

// Global variable potentially defined on command line
extern uint32_t num_phys_regs;
extern uint32_t rob_size;
extern uint32_t num_contexts;
extern bool alpha_renaming;
extern bool use_oracle_bpred;
extern uint32_t frontend_delay_size;
extern uint64_t cycles_waiting_to_retire;
extern bool skip_retire_check_failures;
extern uint32_t num_retire_check_failures;


// Global debug variables
// rob stage                                      : debug_mask is 0x00000010
// rob destination regs                           : debug_mask is 0x00000400
// load addresses and data                        : debug_mask is 0x00001000
// arch dest reg and data for each retiring instr : debug_mask is 0x00002000
extern uint32_t debug_mask;
extern uint32_t tornado_warning;
extern uint64_t cycle_count;
extern FILE* trace_file;

extern uint32_t  num_active_flows;
extern uint32_t* flow_to_ctext_map;
extern bool*     flow_valid;
extern uint32_t* ctext_to_flow_map;
extern bool*     ctext_valid;

// the rob counts retired instructions
extern uint64_t the_instr_count;
extern uint64_t the_retired_branch_count;
extern uint64_t the_mispredicted_branch_count;
extern uint64_t the_mispredicted_non_branch_count;
extern uint64_t the_retired_load_count;
extern uint64_t the_retired_store_count;
extern uint64_t the_retired_misspec_load_count;
extern uint64_t the_retired_misspec_store_count;

//stat collection
extern uint64_t spec_instrs_retired;


class rob_entry {
 public:
  decoder::instr_h instr;
  branch_packet br_packet;
  mem_access_packet mem_packet;
  rob_entry() { instr = decoder::noop;}  //default constructor. 
  rob_entry(decoder::instr_h x_instr, branch_packet x_br_packet, mem_access_packet x_mem_packet):
    instr(x_instr), br_packet(x_br_packet), mem_packet(x_mem_packet) {}
};

class rob : circuit
{
  // pool allocated fifo of already fetched/decoded/renamed instructions:
  pool_alloc<rob_entry> reorder_buf;

  vector<bool>                      count_load_stat;

  bool cancelled_instr_rec;

  branch_packet no_branch;
  mem_access_packet no_mem_access;

  decoder::instr_h retiring_instr;

  //branch_exec stats
  uint32_t nspec_miss_cond;   
  uint32_t nspec_miss_return; 
  uint32_t nspec_miss_call;  
  uint32_t nspec_branch;     
  //speculative              
  uint32_t spec_miss_cond;   
  uint32_t spec_miss_return; 
  uint32_t spec_miss_call;  
  uint32_t spec_branch;

  // debug info
  uint32_t idle_cycles;
  
  // debug arch-model
  arch_model** the_arch_model;

  bool fifo_empty(uint32_t c);
  void oracular_bpred(decoder::instr_h the_instr);
  void count_executed_instr(uint32_t context, bool canceled);
  void print_debug_info();
  void initialize_state(); //at the beginning of every cycle, set state correctly
  void process_input_instr();  //take the instruction from renamer, and put it into the ROB
  bool is_context_flush(uint32_t context);  //returns true if misspec/mispred on a particular context
  bool is_context_flush();                  //returns true if misspec/mispred on ANY context

  void generate_rob_head_and_tail();
  void generate_rob_head_ts();
  void process_mem_exec();
  void process_syscall_exec();
  void process_store_bus();
  void process_branch_exec();
  void process_wb_bus();
  void flush_context(uint32_t context, uint64_t cancel_ts);
  bool investigate_misspec(uint32_t* misspec_context, uint64_t* cancel_instr_num);
  void retire_canceled_instr(uint32_t context);
  void do_arch_compare(const decoder::instr_h instr_retiring, uint32_t retiring_context);
  void train_reconvergence_predictor(const rob_entry* retiring_slot);
  void update_cycles_waiting(const rob_entry* retiring_slot);
  void update_count_load_stats(const rob_entry* retiring_slot, bool found_retire_context, uint32_t retiring_context);
  void process_tornado_warning(bool found_retire_context, uint32_t retiring_context);
  void send_retirement_signals(const rob_entry* retiring_slot);
  void update_branch_predictors_on_instr_retirement(const rob_entry* retiring_slot);
  void retire_valid_instr(uint32_t context);
  void collect_retiring_instr_stats(const rob_entry* the_entry);
  void print_retiring_context_debug_info();
  bool schedule_retiring_context(uint32_t *retire_this);


  void clear_state_on_simpanic();
  void update_branch_exec_stats();
  void recalc();

public:
  bool stall_prev_stages(uint32_t c);
  void account_phys_regs(vector<uint32_t>& phys_regs);
  rob(arch_model ** am);

  // input from rename:
  inport<decoder::instr_h> instr_in;

  // inputs from exec_unit:
  inport<branch_packet>     branch_exec;     // execution unit results for branches
  inport<store_bus_packet>  store_bus;      // execution unit results for stores
  inport<mem_access_packet> mem_access_exec; // execution unit results for loads/stores
  inport<bus_packet>        writeback_bus;  // execution unit results for everything else
  inport<syscall_exec_packet> syscall_exec;  //ex unit result for syscall execution

  // outputs:
  statereg<vector<uint32_t> >   rob_head;   //DELTA: needed by the scheduler. Probably to find if syscalls are safe.
  statereg<vector<uint32_t> >   rob_tail;   //DELTA: needed by the scheduler. Needed because the scheduler is the one assigning rob_slot to instructions!!
  statereg<vector<uint8_t> >  rob_stall;
  statereg<decoder::instr_h>  retiring_instr_out;
  statereg<mem_access_packet> mem_access_out;
  statereg<branch_packet>     branch_out;
  statereg<vector<uint64_t> > rob_head_timestamp;  //DELTA.
  statereg<uint32_t> rob_nonspec_final_pc;
  statereg<simpanic_packet> simpanic_out;  //simpanics: arch-model mismatches on retire
  statereg<branch_packet>     branch_ret;
  statereg<uint8_t> instruction_retiring;
  statereg<uint32_t> retiring_instr_context;
  statereg<vector<uint64_t> > instr_executed;
  statereg<vector<uint64_t> > instr_executed_correctly;

};

#endif /* ROB_H */
