#ifndef LSQ_PF_H_GUARD
#define LSQ_PF_H_GUARD

#include <vector>
#include <map>
#include <assert.h>
#include "globals.h"
#include "pool_alloc.h"
#include "lsq_packets.h"
#include "sparse_memory.h"

extern uint32_t  num_contexts;
extern uint32_t  frontend_delay_size;

// show load addresses and data  : debug_mask is 0x1000
// show store addresses and data : debug_mask is 0x0800
extern uint32_t debug_mask;

// The arb is a 2D data structure.
// The first dimension is indexed by address. Each address-indexed slot
//  contains pointers to all the in-flight, completed loads (stores)
//  to that address.
// The second dimension is indexed by timestamp. That is, the timestamp
//  specifies a specific load (store) among the set of in-flight loads
//  and stores to a given address.
// The arb address mask is used to mask off low-order address bits in
//  the tags of the arb's entries, so that all loads (stores) to a
//  given aligned double-word are binned in the same arb entry.
typedef map<uint64_t, size_t> arb_entry; // map from timestamp to ldq (stq) slot
typedef map<uint32_t, arb_entry> arb;  // map from address to arb_entry
#define ARB_ADDR_MASK (~0x7)

class stq {
private:
  // stq data structure
  arb the_arb;
  pool_alloc<stq_packet> the_stq;
  uint32_t stq_size;
  uint32_t st_cnt;

  // working set for bypassing from stq's entries to a load
  bool     bypassed;
  uint64_t working_data;
  uint32_t working_vaddr;
  uint64_t working_inum;
  uint8_t  working_fwd_mask;

public:
  stq(uint32_t size);

  void print(); // print all contexts
  void printCtxt(uint32_t ctxt); // print one context
  void printAddr(uint32_t addr); // print all loads to specified address (all contexts)
  void printARB();
  uint32_t get_num_stores(uint32_t ctxt);

  bool full() const;
  bool empty() const;
  bool full(uint32_t context);
  bool empty(uint32_t context);

  bool stall_stores() const; // uniprocessor: stall front end
  bool smt_stall(uint32_t ctxt); // smt: stall fetch of specified context

  size_t allocate(uint32_t context, uint64_t ts);
  void insert(size_t slot, uint32_t address, uint64_t data, uint32_t size);

  void commit_head(uint32_t context, uint64_t ts);
  void flush_head_canceled(uint32_t ctxt);
  void clobber(uint32_t context, uint64_t ts);
  void clear_forwarding_to(uint32_t context);
  uint8_t exposed(uint32_t st_addr, uint64_t& st_ts, uint8_t exposed_bits, uint64_t ld_ts);

  // store-forwarding functions
  void* get_paddr(uint32_t vaddr);
  void initiate_load(uint32_t vaddr, sparse_memory* the_mem, uint64_t instr_num);
  void forward_store(stq_packet& the_store);
  void forward_queue(uint32_t context, uint32_t addr, uint8_t size);

  uint64_t load_int8(uint64_t instr_num, uint32_t vaddr, uint32_t context, uint8_t size, sparse_memory* the_mem, bool& cache_access);
  uint64_t load_uint8(uint64_t instr_num, uint32_t vaddr, uint32_t context, uint8_t size, sparse_memory* the_mem, bool& cache_access);
  uint64_t load_int16(uint64_t instr_num, uint32_t vaddr, uint32_t context, uint8_t size, sparse_memory* the_mem, bool& cache_access);
  uint64_t load_uint16(uint64_t instr_num, uint32_t vaddr, uint32_t context, uint8_t size, sparse_memory* the_mem, bool& cache_access);
  uint64_t load_int32(uint64_t instr_num, uint32_t vaddr, uint32_t context, uint8_t size, sparse_memory* the_mem, bool& cache_access);
  uint64_t load_uint32(uint64_t instr_num, uint32_t vaddr, uint32_t context, uint8_t size, sparse_memory* the_mem, bool& cache_access);
  uint64_t load_uint64(uint64_t instr_num, uint32_t vaddr, uint32_t context, uint8_t size, sparse_memory* the_mem, bool& cache_access);
};

class ldq {
private:
  // ldq data structure
  arb the_arb;
  pool_alloc<ldq_packet> the_ldq;
  uint32_t ldq_size; // total slots in ldq
  uint32_t ld_cnt; // total loads in ldq
  stq* the_stq;

public:
  ldq(uint32_t size, stq* stqptr);

  void print(); // print all contexts
  void printCtxt(uint32_t ctxt); // print one context
  void printAddr(uint32_t addr); // print all loads to specified address (all contexts)
  void printARB();

  bool full() const;
  bool empty() const;
  bool full(uint32_t ctxt);
  bool empty(uint32_t ctxt);

  bool stall_loads() const; // uniprocessor: stall front end
  bool smt_stall(uint32_t ctxt); // smt: stall fetch of specified context

  size_t allocate(uint32_t context, uint64_t ts);
  void insert(size_t slot, uint32_t address, uint32_t size, uint32_t pc, size_t rob_slot, uint32_t ratchk, uint32_t prev_pc);
  bool load_valid(size_t slot, uint64_t ts);

  void commit_head(uint32_t context, uint64_t ts);
  void flush_head_canceled(uint32_t ctxt);
  void clobber(uint32_t context, uint64_t ts);

  bool conflict_check(uint32_t st_addr, uint32_t st_size, uint32_t st_ctext, uint64_t st_ts, ldq_packet& conf);
};

#endif /* LSQ_PF_H_GUARD */
