#ifndef SPARSE_MEMORY_H_GUARD
#define SPARSE_MEMORY_H_GUARD

#include <inttypes.h>
#include "sim_endian.h"
#include <stdio.h>
#include <vector>
#include <assert.h>

using namespace std;

// command line variable
extern uint32_t cache_line_size;
extern uint32_t debug_mask;

#define CACHE_LINE_SIZE_BYTES 128 //in bytes
typedef vector<uint32_t> cacheline_t;
#define CACHE_LINE_INIT cacheline_t(CACHE_LINE_SIZE_BYTES/4,0)

class sparse_memory
{
private:
  // The memory will have a maximum size of 2^(PAGE_BITS + TABLE_BITS)
  // For reasonable performance you shouldn't use more than half that,
  // since the hashtable performance will degrade quickly.
  enum {
    PAGE_BITS = 15,
    PAGE_SIZE = (1 << PAGE_BITS),
    PAGE_MASK = (PAGE_SIZE - 1),
    TABLE_BITS = 15,             // Can NOT be larger than 16 for hashing to work
    TABLE_SIZE = (1 << TABLE_BITS),
    TABLE_MASK = (TABLE_SIZE - 1),
    MAGIC_NUMBER = 0x9e3779b9,
    EMPTY = (uint32_t)-1,
  };

  enum access_mode_e {
    READ_ONLY,
    READ_WRITE,
  };

  static uint32_t tag_of(uint32_t vaddr) {
    return (vaddr >> PAGE_BITS);
  }
  static uint32_t offset_of(uint32_t vaddr) {
    return (vaddr & ((1 << PAGE_BITS) - 1));
  }
  // we generate twice as many bits as we need:
  static uint32_t hash_of(uint32_t tag) {
    // This is from Knuth.  Basically we're multiplying by the golden
    // ratio, 0.61803398840129375. (Imagine that the radix point is
    // just to the left of the most significant bit.)
    return ((tag * MAGIC_NUMBER) >> (32 - (2 * TABLE_BITS)));
  }
  // we use the top half of the bits for the primary hash
  static uint32_t primary_hash_of(uint32_t hash0) {
    return (hash0 >> TABLE_BITS);
  }
  // and the bottom half we use for the secondary hash
  static uint32_t secondary_hash_of(uint32_t hash0) {
    // guarantee an odd number (so that the number is relatively prime
    // to TABLE_SIZE)
    return ((hash0 & TABLE_MASK) | 1);
  }

  bool find_key(uint32_t tag, int32_t& key) const {
    //if(debug_mask) printf("sparse_memory::find key of tag %x\n", tag);
    int32_t hash0 = hash_of(tag);

    // primary hash
    key = primary_hash_of(hash0);
    if (hashtab[key].tag == tag) return true;

    // secondary hash (after G. Knott)
    int32_t secondary = secondary_hash_of(hash0);
    while (hashtab[key].tag != EMPTY) {
      key = ((key + secondary) & TABLE_MASK);
      if (hashtab[key].tag == tag) return true;
    }

    // key not found, return key of page to create
    return false;
  }

  void copy_linked_page(int32_t key, int32_t linked_key);
  void compare_and_sync_with_linked();

  struct entry_t {
    uint32_t tag;
    uint32_t paddr;
  };
  entry_t hashtab[TABLE_SIZE];
  size_t page_count;
  const sparse_memory* linked_mem;

  // Algorithm: use open addressing with double hashing (no chaining).
  // This is from Knuth (vol. 3, sec. 6.4, algorithm D).
  template <class Action>
  void* lookup(uint32_t vaddr, access_mode_e protection) {
    //if(debug_mask) printf("sparse_memory::lookup vaddr %x\n", vaddr);

    uint32_t tag = tag_of(vaddr);
    int32_t key;
    if(find_key(tag, key)) return Action()(this, key, vaddr);

    // Check the linked memory and copy-on-write its page if it exists:
    if(linked_mem != NULL) {
      //if(debug_mask) printf("sparse_memory::lookup -> linked_mem access\n");
      int32_t link_key;
      if(linked_mem->find_key(tag,link_key)) {
        if(protection == READ_ONLY) return Action()(linked_mem, link_key, vaddr);
        else {
          ++page_count;
          copy_linked_page(key, link_key);
          return Action()(this, key, vaddr);
        }
      }
    }

    // This page has never been accessed before.  So allocate a new page.
    if (page_count > (TABLE_SIZE / 2)) {
      if (page_count > ((TABLE_SIZE * 3) / 4)) {
        fprintf(stderr, "error: sparse_memory out of hash entries\n");
        abort();
      }
      else {
        fprintf(stderr, "warning: sparse_memory close to running out of hash entries, continuing anyway\n");
      }
    }

    //if(debug_mask) printf("sparse_memory::lookup -> update page count\n");
    ++page_count;

    //woley-2005.09.23-this debug code is to ensure proper hashing, can be removed after
    //we are satisfied that the sparse_memory is robust
    assert(hashtab[key].tag == EMPTY);
    assert(hashtab[key].paddr == 0);

    //if(debug_mask) printf("sparse_memory::lookup tag: %x\n", tag);
    hashtab[key].tag = tag;
    hashtab[key].paddr = (uint32_t) (new char[PAGE_SIZE]);

    if (hashtab[key].paddr == 0) {
      fprintf(stderr, "OUT OF MEMORY IN imem::lookup()!!!!!!\n");
      abort();
    }

    // initialize newly created pages to zero
    for(uint32_t p = 0; p < PAGE_SIZE; p++)
      *((char *)hashtab[key].paddr + p) = 0;

    return Action()(this, key, vaddr);
  }

  /*friend*/ struct construct_address  {
    void* operator()(const sparse_memory* const  mem,
                     uint32_t key,
                     uint32_t vaddr) {
      return (void*)(mem->hashtab[key].paddr + offset_of(vaddr));
    }
  };

  void* get_paddr(uint32_t vaddr, access_mode_e readonly) {
    return lookup<construct_address>(vaddr, readonly);
  }

public:
  sparse_memory(const sparse_memory* x_lm = NULL) : 
    page_count(0),
    linked_mem(x_lm)
  {
    for (uint32_t i = 0; i < TABLE_SIZE; i++) {
      hashtab[i].tag = EMPTY;
      hashtab[i].paddr = 0;
    }
  }

  uint32_t get_page_count() const { return page_count; }
  void sync_identical_with_linked();
  void sync_with_linked();

  void print_stats() {
    uint32_t hash_entries = 0;
    for (uint32_t i = 0; i < TABLE_SIZE; ++i) {
      if (hashtab[i].tag != EMPTY) {
        ++hash_entries;
      }
    }
    uint32_t mem_size_kb = (page_count * PAGE_SIZE) / 1024;
    printf(" sparse_mem %p:: pages %u size in memory: %uKB\n", this, page_count, mem_size_kb);
    assert(hash_entries == page_count);
  }

  //this makes a copy of the sparse_memory, and returns a pointer to it
  sparse_memory *make_copy() {
    //if(debug_mask) printf("sparse_memory::make_copy()\n");
    sparse_memory *temp = new sparse_memory();
    temp->page_count = page_count;

    for (uint32_t i=0; i<TABLE_SIZE; i++) {
      
      temp->hashtab[i].tag = hashtab[i].tag;
      if (hashtab[i].tag != EMPTY) {
        temp->hashtab[i].paddr = (uint32_t) (new char[PAGE_SIZE]);

        if (temp->hashtab[i].paddr == 0) {
          fprintf(stderr, "OUT OF MEMORY IN imem::lookup()!!!!!!\n");
          abort();
        }

        for (uint32_t j=0; j<PAGE_SIZE; j++)
          *((char *)temp->hashtab[i].paddr + j) = *((char *)hashtab[i].paddr + j);
      } else {
        temp->hashtab[i].paddr = 0; //should be NULL, do this so compiler doesnt complain
      }
    }
    return temp;    

  }

  // replace the contents of this memory with that of another
  // (avoids the need to delete/reallocate space that can be reused)
  void replace_with(const sparse_memory& rhs) {
    //if(debug_mask) printf("sparse_memory::replace_with()\n");
    page_count = rhs.page_count;

    for (uint32_t i=0; i<TABLE_SIZE; i++) {
      if (hashtab[i].tag != EMPTY) {
        // we have the allocated space, deallocate or replace as needed
        if(rhs.hashtab[i].tag != EMPTY) {
          // replace contents if the rhs also has an entry at this location
          hashtab[i].tag = rhs.hashtab[i].tag;
          for (uint32_t j=0; j<PAGE_SIZE; j++)
            *((char *)hashtab[i].paddr + j) = *((char *)rhs.hashtab[i].paddr + j);
        }
        else {
          // deallocate the space, no-longer used
          hashtab[i].tag = EMPTY;
          delete [] (char *) hashtab[i].paddr;
          hashtab[i].paddr = 0; //should be NULL, do this so compiler doesnt complain
        }
      }
      else {
        // we do not have the allocated space
        if(rhs.hashtab[i].tag != EMPTY) {
          // allocate and fill
          hashtab[i].paddr = (uint32_t) (new char[PAGE_SIZE]);

          if (hashtab[i].paddr == 0) {
            fprintf(stderr, "OUT OF MEMORY IN imem::lookup()!!!!!!\n");
            abort();
          }

          hashtab[i].tag = rhs.hashtab[i].tag;
          for (uint32_t j=0; j<PAGE_SIZE; j++)
            *((char *)hashtab[i].paddr + j) = *((char *)rhs.hashtab[i].paddr + j);
        }
      }
    }
  }


  ~sparse_memory() {
    for (uint32_t i=0; i<TABLE_SIZE; ++i) {
      if (hashtab[i].tag != EMPTY)
        delete [] (char *) hashtab[i].paddr;
      else
        assert(hashtab[i].paddr == 0);
    }
  }

  
  // for testing
  void print_table();

  uint64_t load_int8(uint32_t vaddr) {
    int8_t* paddr = (int8_t*)get_paddr(vaddr, READ_ONLY);
    uint8_t data = *paddr;
    return ((int64_t)((int8_t)data));
  }
  uint64_t load_uint8(uint32_t vaddr) {
    uint8_t* paddr = (uint8_t*)get_paddr(vaddr, READ_ONLY);
    uint8_t data = *paddr;
    return ((uint64_t)data);
  }
  uint64_t load_int16(uint32_t vaddr) {
    uint16_t* paddr = (uint16_t*)get_paddr(vaddr, READ_ONLY);
    uint16_t memdata = *paddr;
    uint16_t data = CVT_ENDIAN_HWORD(memdata);
    return ((int64_t)((int16_t)data));
  }
  uint64_t load_uint16(uint32_t vaddr) {
    uint16_t* paddr = (uint16_t*)get_paddr(vaddr, READ_ONLY);
    uint16_t memdata = *paddr;
    uint16_t data = CVT_ENDIAN_HWORD(memdata);
    return ((uint64_t)data);
  }
  uint64_t load_int32(uint32_t vaddr) {
    uint32_t* paddr = (uint32_t*)get_paddr(vaddr, READ_ONLY);
    uint32_t memdata = *paddr;
    uint32_t data = CVT_ENDIAN_WORD(memdata);
    return ((int64_t)((int32_t)data));
  }
  uint64_t load_uint32(uint32_t vaddr) {
    uint32_t* paddr = (uint32_t*)get_paddr(vaddr, READ_ONLY);
    uint32_t memdata = *paddr;
    uint32_t data = CVT_ENDIAN_WORD(memdata);
    return ((uint64_t)data);
  }
  uint64_t load_uint64(uint32_t vaddr) {
    uint64_t* paddr = (uint64_t*)get_paddr(vaddr, READ_ONLY);
    uint64_t memdata = *paddr;
    uint64_t data = CVT_ENDIAN_DWORD(memdata);
    return ((uint64_t)data);
  }
  void store_uint8(uint8_t data, uint32_t vaddr) {
    uint8_t* paddr = (uint8_t*)get_paddr(vaddr, READ_WRITE);
    *paddr = data;
  }
  void store_uint16(uint16_t data, uint32_t vaddr) {
    uint16_t* paddr = (uint16_t*)get_paddr(vaddr, READ_WRITE);
    uint16_t memdata = CVT_ENDIAN_HWORD(data);
    *paddr = memdata;
  }
  void store_uint32(uint32_t data, uint32_t vaddr) {
    uint32_t* paddr = (uint32_t*)get_paddr(vaddr, READ_WRITE);
    uint32_t memdata = CVT_ENDIAN_WORD(data);
    *paddr = memdata;
  }
  void store_uint64(uint64_t data, uint32_t vaddr) {
    uint64_t* paddr = (uint64_t*)get_paddr(vaddr, READ_WRITE);
    uint64_t memdata = CVT_ENDIAN_WORD(data);
    *paddr = memdata;
  }

  // routines for cache line loading and storing
  cacheline_t load_cacheline(uint32_t vaddr) {
    cacheline_t cline(cache_line_size/4, 0);
    for(uint32_t i=0; i < cache_line_size/4; i++) {
      cline[i] = load_uint32(vaddr + (4 * i));
    }
    return cline;
  }

  void store_cacheline(cacheline_t cline, uint32_t vaddr) {
    for(uint32_t i=0; i < cache_line_size/4; i++)
      {
        uint32_t* paddr = (uint32_t*)get_paddr(vaddr, READ_WRITE);
        *paddr = cline[i];
        vaddr = vaddr + 4;
      }
  }


  // routines to move data between host and simulator
  void memcpy_to_host(void* dest, const uint32_t src, size_t n);
  void memcpy_from_host(uint32_t dest, const void* src, size_t n);
  void strcpy_to_host(char* dest, const uint32_t src);
  void strcpy_from_host(uint32_t dest, const char* src);

  // syscall interface
  void emulate_syscall(uint32_t cmd_addr);
};

#endif /* SPARSE_MEMORY_H_GUARD */
