/**
 * University of Illinois/NCSA
 * Open Source License
 *
 *  Copyright (c) 2007-2009,The Board of Trustees of the University of
 *  Illinois.  All rights reserved.
 *
 *  Copyright (c) 2009 Sam King, Jeffrey Young
 *
 *  Developed by:
 *
 *  Professor Sam King in the Department of Computer Science
 *  The University of Illinois at Urbana-Champaign
 *      http://www.cs.uiuc.edu/homes/kingst/Research.html
 *
 *  Jeffrey Young in the Department of Computer Science
 *  The University of Illinois at Urbana-Champaign
 *
 *       Permission is hereby granted, free of charge, to any person
 *       obtaining a copy of this software and associated
 *       documentation files (the "Software"), to deal with the
 *       Software without restriction, including without limitation
 *       the rights to use, copy, modify, merge, publish, distribute,
 *       sublicense, and/or sell copies of the Software, and to permit
 *       persons to whom the Software is furnished to do so, subject
 *       to the following conditions:
 *
 *          Redistributions of source code must retain the above
 *          copyright notice, this list of conditions and the
 *          following disclaimers.
 *
 *          Redistributions in binary form must reproduce the above
 *          copyright notice, this list of conditions and the
 *          following disclaimers in the documentation and/or other
 *          materials provided with the distribution.
 *
 *          Neither the names of Sam King, the University of Illinois,
 *          nor the names of its contributors may be used to endorse
 *          or promote products derived from this Software without
 *          specific prior written permission.
 *
 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 *  NONINFRINGEMENT.  IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
 *  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 *  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 *  DEALINGS WITH THE SOFTWARE.
*/
#include <assert.h>
#include <fcntl.h>
#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <time.h>
#include <unistd.h>

#include <fstream>
#include <iostream>
#include <map>
#include <sstream>
#include <string>

using namespace std;

#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif

#define TCB_SIZE 4096
#define NUM_TCB_ENTRIES 128
#define RAM_SIZE 0x20000
#define NUM_REGS 32

#define OP_FORMAT_1     0x1
#define OP_FORMAT_2     0x0
#define OP_FORMAT_3_ALU 0x2
#define OP_FORMAT_3_MEM 0x3

#define OP2_BRANCH      0x2
#define OP2_SETHI       0x4

#define OP3_ADD         0x0
#define OP3_SUB         0x4
#define OP3_SUBCC       0x14
#define OP3_JMPL        0x38

#define OP3_LD          0x0
#define OP3_ST          0x4

#define COND_BNE        0x9
#define COND_B          0x8
#define COND_BE         0x1

int PRINT_INST = 1;

typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;
typedef int32_t i32;

typedef enum { T0, T1 } host_reg_t;

struct TCB {
    u32 pc_enter;
    u32 pc_exit;
    unsigned int bytesInCache;
    unsigned int totalSize;
    u8 *buffer;
};

struct CPU {
    struct TCB *tcb;
    void *ram;
    u32 pc;
    u32 branch_pc;      /* branch delay instructions currently ignored */
    u32 regs[NUM_REGS];
    u32 icc_z;          /* at some point combine flags into single bit array */
    u32 icc_n;
    bool translationDone;
} __attribute__ ((__packed__));

struct control_signals {
    u32 op;
    u32 rd;
    u32 cond;
    u32 op2;
    u32 op3;
    u32 rs1;
    u32 i;
    u32 asi;
    i32 simm;
    u32 imm;
    u32 disp22;
    u32 disp30;
    u32 rs2;
    u32 raw;
};

static uint8_t *ram = NULL;
static bool userQuit = false;
struct TCB *tcbMap[NUM_TCB_ENTRIES];
static time_t startTime;

void ctrlC(int /*signo */) {
    userQuit = true;
}

/********************* Code generation functions *****************************/

void store_in_cache(struct TCB *tcb, void *translation, unsigned int size) {
    assert((tcb->bytesInCache + size) <= tcb->totalSize);
    memcpy(tcb->buffer + tcb->bytesInCache, translation, size);
    tcb->bytesInCache += size;
}

/**
 * Useful information:
 *
 *
 * The variables called "opcode" in gen_force_return() through
 * gen_load_T0_T1() below encode sequences of one or more x86 instructions.
 * Remember that each x86 instruction can be one or more bytes long.
 * See the Intel manual for reference:
 *  http://www.intel.com/Assets/PDF/manual/253666.pdf
 *  http://www.intel.com/Assets/PDF/manual/253667.pdf
 *
 * The following pages are particularly useful:
 *  Table A-2
 *  Table A-6
 *  Table 2-2
 *
 * First look at table A-2 to figure out what instruction a particular
 * byte encodes (e.g., 0x12 = row 1, column 2 of the table).
 *
 * When you see a superscript note in this table like 1A or similar, that means
 * the next byte of the instruction is encoded as described in tables A-6 and
 * 2-2.
 *
 * On the other hand, if the instruction doesn't take any further arguments,
 * that means the next byte is the beginning of another instruction.
 *
 *
 * Also keep in mind that x86 is little-endian, so bytes end up in memory in
 * the opposite order of what you see in an integer constant.  For example,
 *  u32 0x12345678
 * will be stored in memory as the data bytes
 *  0x78 0x56 0x34 0x12
 *
 *
 * In some cases, we use a u32/u64 to represent 3-byte and 6-byte instructions; in 
 * those cases, it's important to tell store_in_cache() to only store the first 3
 * bytes.
 * 
 * 
 * Note: T0 -> eax
 *       T1 -> ebx
 *
 *       rdi holds pointer to cpu structure
 */


/********* code generation for load / store guest cpu state ***********/

void gen_load_T0(struct CPU *cpu, u16 offset) {
    u64 opcode = offset;
    assert(opcode < 65536);
    opcode <<= 16;
    opcode |= 0x878b;
    store_in_cache(cpu->tcb, &opcode, sizeof(opcode)-2);
}

void gen_load_T1(struct CPU *cpu, u16 offset) {
    u64 opcode = offset;
    assert(opcode < 65536);
    opcode <<= 16;
    opcode |= 0x9f8b;
    store_in_cache(cpu->tcb, &opcode, sizeof(opcode)-2);
}

void gen_store_T0(struct CPU *cpu, u16 offset) {
    u64 opcode = offset;
    assert(opcode < 65536);
    opcode <<= 16;
    opcode |= 0x8789;
    store_in_cache(cpu->tcb, &opcode, sizeof(opcode)-2);
}

void gen_store_T1(struct CPU *cpu, u16 offset) {
    u64 opcode = offset;
    assert(opcode < 65536);
    opcode <<= 16;
    opcode |= 0x9f89;
    store_in_cache(cpu->tcb, &opcode, sizeof(opcode)-2);
}

void load_guest_T(struct CPU *cpu, host_reg_t t, u8 offset) {
    if (t == T0) {
        gen_load_T0(cpu, offset);
    } else if (t == T1) {
        gen_load_T1(cpu, offset);
    } else {
        assert(false);
    }
}

void store_guest_T(struct CPU *cpu, host_reg_t t, u8 offset) {
    if (t == T0) {
        gen_store_T0(cpu, offset);
    } else if (t == T1) {
        gen_store_T1(cpu, offset);
    } else {
        assert(false);
    }
}

/**
 * Load a guest register from cpu struct into translation register
 * T0 or T1
 */
void gen_load_guest_reg(struct CPU *cpu, host_reg_t t, u32 regNum) {
    u8 reg_offset = ((u8 *) &cpu->regs[0]) - ((u8 *) cpu) + regNum*sizeof(cpu->regs[0]);
    load_guest_T(cpu, t, reg_offset);
}

/**
 * Store a guest register from translation register
 * T0 or T1 into cpu struct
 */
void gen_store_guest_reg(struct CPU *cpu, host_reg_t t, u32 regNum) {
    if (regNum == 0) {
        return;
    }
    u8 reg_offset = ((u8 *) &cpu->regs[0]) - ((u8 *) cpu) + regNum*sizeof(cpu->regs[0]);
    store_guest_T(cpu, t, reg_offset);
}

/**
 * Load the guest pc from the cpu struct into translation register
 * T0 or T1
 */
void gen_load_guest_pc(struct CPU *cpu, host_reg_t t) {
    u8 offset = ((u8 *) &cpu->pc) - ((u8 *) cpu);
    load_guest_T(cpu, t, offset);
}
/**
 * Store the guest pc from translation register
 * T0 or T1 into the cpu struct
 */
void gen_store_guest_pc(struct CPU *cpu, host_reg_t t) {
    u8 offset = ((u8 *) &cpu->pc) - ((u8 *) cpu);
    store_guest_T(cpu, t, offset);
}

/**
 * Load the guest zero flag from the cpu struct into translation register
 * T0 or T1
 */
void gen_load_guest_zero_flag(struct CPU *cpu, host_reg_t t) {
    u8 offset = ((u8 *) &cpu->icc_z) - ((u8 *) cpu);
    load_guest_T(cpu, t, offset);
}
/**
 * Store the guest zero flag from translation register
 * T0 or T1 into the cpu struct
 */
void gen_store_guest_zero_flag(struct CPU *cpu, host_reg_t t) {
    u8 offset = ((u8 *) &cpu->icc_z) - ((u8 *) cpu);
    store_guest_T(cpu, t, offset);
}
/**
 * Store the guest negative flag from translation register
 * T0 or T1 into the cpu struct
 */
void gen_store_guest_negative_flag(struct CPU *cpu, host_reg_t t) {
    u8 offset = ((u8 *) &cpu->icc_n) - ((u8 *) cpu);
    store_guest_T(cpu, t, offset);
}


/*******************************************************************/


/*********************** Code generation functions *****************/

/**
 * Used to put a native return instruction into the translation
 * cache.  Use this to transition out of translation code, back
 * into the simulator.
 */
void gen_force_return(struct CPU *cpu) {
    u32 opcode = 0xc3585b;
    store_in_cache(cpu->tcb, &opcode, sizeof(opcode) - 1);
}


/**
 * Load the address of the ram variable into translation register T1.
 * this is used for generating an address that can be used for 
 * guest memory access within translated code.
 */
void gen_load_ram_T1(struct CPU *cpu) {
    assert(sizeof(cpu->ram) == sizeof(u64));

    // note, we need to use the full 64bit value
    u64 opcode = ((u8 *) &cpu->ram) - ((u8 *) cpu);
    opcode <<= 24;
    opcode |= 0x9f8b48;
    store_in_cache(cpu->tcb, &opcode, sizeof(opcode) - 1);
}

void gen_alu_imm32(struct CPU *cpu, i32 imm, u64 opcode_lower) {
    u64 opcode = 0;
    opcode |= imm;
    opcode <<= 16;
    opcode |= opcode_lower;
    store_in_cache(cpu->tcb, &opcode, sizeof(opcode) - 2);
}

/**
 * stores an imm32 value into translation register T0
 */
void gen_mov_imm32_T0(struct CPU *cpu, int32_t imm) {
    gen_alu_imm32(cpu, imm, 0xc0c7);
}

/**
 * stores an imm32 value into translation register T1
 */
void gen_mov_imm32_T1(struct CPU *cpu, int32_t imm) {
    gen_alu_imm32(cpu, imm, 0xc3c7);
}

/**
 * adds translation registers T0 and T1 and stores the result in T1
 */
void gen_add_T0_T1(struct CPU *cpu) {
    u16 opcode = 0xc301;
    store_in_cache(cpu->tcb, &opcode, sizeof(opcode));
}

/**
 * adds translation registers T0 and T1 and stores the result in T1.
 * Note: this operates on 64 bit values and should only be used
 * for manipulating addresses used to access guest ram
 */
void gen_addq_T0_T1(struct CPU *cpu) {
    u32 opcode = 0xc30148;
    store_in_cache(cpu->tcb, &opcode, sizeof(opcode) - 1);
}

/**
 * add imm32 and T0 and store the result in T0.
 */
void gen_add_imm32_T0(struct CPU *cpu, int32_t imm) {
    gen_alu_imm32(cpu, imm, 0xc081);
}

/**
 * add imm32 and T1 and store the result in T1.
 */
void gen_add_imm32_T1(struct CPU *cpu, int32_t imm) {
    gen_alu_imm32(cpu, imm, 0xc381);
}

/**
 * sub imm32 and T0 and store the result in T0.
 */
void gen_sub_imm32_T0(struct CPU *cpu, int32_t imm) {
    gen_alu_imm32(cpu, imm, 0xe881);
}

/**
 * sub imm32 and T1 and store the result in T1.
 */
void gen_sub_imm32_T1(struct CPU *cpu, int32_t imm) {
    gen_alu_imm32(cpu, imm, 0xeb81);
}

/**
 * compare T0 and T1, will set host CPU flags appropriately
 */
void gen_cmp_T0_T1(struct CPU *cpu) {
    u16 opcode = 0xc339;
    store_in_cache(cpu->tcb, &opcode, sizeof(opcode));
}

/**
 * jump based on the results of a cmp instruction
 */
void gen_jne(struct CPU *cpu, u8 offset) {
    u16 opcode = offset;
    opcode <<= 8;
    opcode |= 0x75;
    store_in_cache(cpu->tcb, &opcode, sizeof(opcode));
}

/**
 * jump based on the results of a cmp instruction
 */
void gen_jns(struct CPU *cpu, u8 offset) {
    u16 opcode = offset;
    opcode <<= 8;
    opcode |= 0x79;
    store_in_cache(cpu->tcb, &opcode, sizeof(opcode));
}

/**
 * generate debugging trap, we used this instruction to signify the end of
 * a simulation run
 */
void gen_int3(struct CPU *cpu) {
    u8 opcode = 0xcc;
    store_in_cache(cpu->tcb, &opcode, sizeof(opcode));
}

/**
 * store T0 in address stored at T1
 */
void gen_store_T0_T1(struct CPU *cpu) {
    u16 opcode = 0x0389;
    store_in_cache(cpu->tcb, &opcode, sizeof(opcode));
}

/**
 * load from address stored at T1 into T0
 */
void gen_load_T0_T1(struct CPU *cpu) {
    u16 opcode = 0x038b;
    store_in_cache(cpu->tcb, &opcode, sizeof(opcode));
}

/****************************************************************************/

i32 sign_extend_13(u32 imm) {
    i32 ret;

    ret = imm;
    assert((imm & 0xffffe000) == 0);
    if((imm & 0x01000) != 0) {
        ret |= 0xffffe000;
    }

    return ret;
}

i32 sign_extend_22(u32 disp22) {
    i32 ret;

    ret = disp22;    
    assert((disp22 & 0xffc00000) == 0);
    if((disp22 & 0x200000) != 0) {
        ret |= 0xffc00000;
    }

    return ret;
}

u32 fetch(struct CPU *cpu) {
    assert((cpu->pc & 0x3) == 0);
    return *((u32 *) (ram + cpu->pc));
}

void decode_control_signals(u32 opcode, struct control_signals *control) {
    control->raw = opcode;
    control->op = (opcode >> 30) & 0x3;
    control->rd = (opcode >> 25) & 0x1f;
    control->op2 = (opcode >> 22) & 0x7;
    control->op3 = (opcode >> 19) & 0x3f;
    control->rs1 = (opcode >> 14) & 0x1f;
    control->i = (opcode >> 13) & 0x1;
    control->asi = (opcode >> 5) & 0xff;
    control->simm = sign_extend_13(opcode & 0x1fff);
    control->imm = opcode & 0x3fffff;
    control->rs2 = opcode & 0x1f;
    control->disp22 = opcode & 0x3fffff;
    control->disp30 = opcode & 0x3fffffff;
    control->cond = (opcode >> 25) & 0xf;
}

string get_print_header(struct CPU *cpu, const char *inst) {
    stringstream str;
    str << (void *) (cpu->pc - 4) << ": " << inst << " ";
    return str.str();
}

string get_print_header(struct CPU *cpu, const char *inst, u32 rd) {
    stringstream str;
    str << get_print_header(cpu, inst) << "r" << rd << ", ";
    return str.str();
}

void print_rrr(struct CPU *cpu, const char *inst, u32 rd, u32 rs1, u32 rs2) {
    if (PRINT_INST) {
        cout << get_print_header(cpu, inst, rd) << "r" << rs1 << ", " << "r" << rs2 << endl;
    }
}

void print_rri(struct CPU *cpu, const char *inst, u32 rd, u32 rs1, i32 signedImm) {
    if (PRINT_INST) {
        cout << get_print_header(cpu, inst, rd) << "r" << rs1 << ", " << signedImm << endl;
    }
}

void print_ri(struct CPU *cpu, const char *inst, u32 rd, u32 imm) {
    if (PRINT_INST) {
        cout << get_print_header(cpu, inst, rd) << imm << endl;
    }
}

void print_i(struct CPU *cpu, const char *inst, u32 imm) {
    if (PRINT_INST) {
        cout << get_print_header(cpu, inst) << imm << endl;
    }
}


/*********************** Code generation functions *****************/
void add(struct CPU *cpu, u32 rd, u32 rs1, u32 rs2) {
    print_rrr(cpu, "add", rd, rs1, rs2);
    gen_load_guest_reg(cpu, T0, rs1);
    gen_load_guest_reg(cpu, T1, rs2);
    gen_add_T0_T1(cpu);
    gen_store_guest_reg(cpu, T1, rd);
}

void addi(struct CPU *cpu, u32 rd, u32 rs1, i32 simm) {
    print_rri(cpu, "addi", rd, rs1, simm);
    gen_load_guest_reg(cpu, T0, rs1);
    gen_add_imm32_T0(cpu, simm);
    gen_store_guest_reg(cpu, T0, rd);
}

void subi(struct CPU *cpu, u32 rd, u32 rs1, i32 simm) {
    print_rri(cpu, "subi", rd, rs1, simm);
    gen_load_guest_reg(cpu, T0, rs1);
    gen_sub_imm32_T0(cpu, simm);
    gen_store_guest_reg(cpu, T0, rd);
}

void subicc(struct CPU *cpu, u32 rd, u32 rs1, i32 simm) {
    print_rri(cpu, "subcci", rd, rs1, simm);

    // clear flags
    gen_mov_imm32_T1(cpu, 0);
    gen_store_guest_zero_flag(cpu, T1);
    gen_store_guest_negative_flag(cpu, T1);

    // calculate result of subtraction
    gen_load_guest_reg(cpu, T0, rs1);
    gen_sub_imm32_T0(cpu, simm);

    // set flags
    gen_mov_imm32_T1(cpu, 1);

    // set zero flag if necessary
    gen_jne(cpu, 6);
    gen_store_guest_zero_flag(cpu, T1);

    // set negative flag if necessary
    gen_jns(cpu, 6);
    gen_store_guest_negative_flag(cpu, T1);

    // store result
    gen_store_guest_reg(cpu, T0, rd);
}

void sethi(struct CPU *cpu, u32 rd, u32 imm) {
    print_ri(cpu, "sethi", rd, imm);
    gen_mov_imm32_T0(cpu, (imm << 10));
    gen_store_guest_reg(cpu, T0, rd);
}


void ld(struct CPU *cpu, u32 rd, u32 rs1, i32 simm) {
    print_rri(cpu, "ld", rd, rs1, simm);
    gen_load_guest_reg(cpu, T0, rs1);
    gen_add_imm32_T0(cpu, simm);

    // turn the guest address into a host address
    gen_load_ram_T1(cpu);
    gen_addq_T0_T1(cpu);

    gen_load_T0_T1(cpu);
    gen_store_guest_reg(cpu, T0, rd);
}

void st(struct CPU *cpu, u32 rd, u32 rs1, i32 simm) {
    print_rri(cpu, "st", rd, rs1, simm);
    gen_load_guest_reg(cpu, T0, rs1);
    gen_add_imm32_T0(cpu, simm);

    // turn the guest address into a host address
    gen_load_ram_T1(cpu);
    gen_addq_T0_T1(cpu);

    gen_load_guest_reg(cpu, T0, rd);
    gen_store_T0_T1(cpu);
}

void bne(struct CPU *cpu, u32 disp22) {
    print_i(cpu, "bne", 4*sign_extend_22(disp22));
    gen_load_guest_zero_flag(cpu, T0);
    gen_mov_imm32_T1(cpu, 0);
    gen_cmp_T0_T1(cpu);
    gen_jne(cpu, 0x1A);
    gen_mov_imm32_T0(cpu, 4*sign_extend_22(disp22));
    gen_load_guest_pc(cpu, T1);
    gen_sub_imm32_T1(cpu, 4);
    gen_add_T0_T1(cpu);
    gen_store_guest_pc(cpu, T1);
    gen_force_return(cpu);

    cpu->translationDone = true;
}

void be(struct CPU *cpu, u32 disp22) {
    print_i(cpu, "be", 4*sign_extend_22(disp22));
    gen_load_guest_zero_flag(cpu, T0);
    gen_mov_imm32_T1(cpu, 1);
    gen_cmp_T0_T1(cpu);
    gen_jne(cpu, 0x1A);
    gen_mov_imm32_T0(cpu, 4*sign_extend_22(disp22));
    gen_load_guest_pc(cpu, T1);
    gen_sub_imm32_T1(cpu, 4);
    gen_add_T0_T1(cpu);
    gen_store_guest_pc(cpu, T1);
    gen_force_return(cpu);

    cpu->translationDone = true;
}

void b(struct CPU *cpu, u32 disp22) {
    print_i(cpu, "ba", 4*sign_extend_22(disp22));
    gen_mov_imm32_T0(cpu, 4*sign_extend_22(disp22));
    gen_load_guest_pc(cpu, T1);
    gen_sub_imm32_T1(cpu, 4);
    gen_add_T0_T1(cpu);
    gen_store_guest_pc(cpu, T1);
    gen_force_return(cpu);

    cpu->translationDone = true;
}

void jmpli(struct CPU *cpu, u32 rd, u32 rs1, i32 simm) {
    print_rri(cpu, "jmpl", rd, rs1, simm);

    // save current pc to link register
    gen_load_guest_pc(cpu, T0);
    gen_store_guest_reg(cpu, T0, rd);

    // load new address into pc
    gen_load_guest_reg(cpu, T1, rs1);
    gen_add_imm32_T1(cpu, simm);
    gen_store_guest_pc(cpu, T1);

    gen_force_return(cpu);

    cpu->translationDone = true;
}

void call(struct CPU *cpu, u32 disp30) {
    print_i(cpu, "call", disp30<<2);

    // save pc of call instruction to r15
    gen_load_guest_pc(cpu, T0);
    gen_sub_imm32_T0(cpu, 4);
    gen_store_guest_reg(cpu, T0, 15);

    // update pc with new address
    gen_mov_imm32_T1(cpu, disp30<<2);
    gen_add_T0_T1(cpu);
    gen_store_guest_pc(cpu, T1);
    gen_force_return(cpu);

    cpu->translationDone = true;
}

void unknown_inst(struct CPU *cpu, struct control_signals *control) {
    cout << "op = 0x" << hex << control->op << endl;
    cout << "op2 = 0x" << hex << control->op2 << endl;
    cout << "op3 = 0x" << hex << control->op3 << endl;
    cout << "pc = 0x" << hex << cpu->pc << endl;
    assert(false);
}

void process_format_1(struct CPU *cpu, struct control_signals *control) {
    assert(control->op == OP_FORMAT_1);
    call(cpu, control->disp30);
}

void process_format_2(struct CPU *cpu, struct control_signals *control) {
    assert(control->op == OP_FORMAT_2);
    switch(control->op2)
    {
        case OP2_BRANCH:
            switch(control->cond)
            {
                case COND_B:
                    b(cpu, control->disp22);
                    break;
                case COND_BE:
                    be(cpu, control->disp22);
                    break;
                case COND_BNE:
                    bne(cpu, control->disp22);
                    break;
                default:
                    unknown_inst(cpu, control);
            }
            break;
        case OP2_SETHI:
            sethi(cpu, control->rd, control->imm);
            break;
        default:
            unknown_inst(cpu, control);
    }
}

void process_format_3_alu(struct CPU *cpu, struct control_signals *control) {
    assert(control->op == OP_FORMAT_3_ALU);
    switch(control->op3)
    {
        case OP3_ADD:
            if(control->i == 0) {
                add(cpu, control->rd, control->rs1, control->rs2);
            }
            else {
                addi(cpu, control->rd, control->rs1, control->simm);
            }
            break;
        case OP3_SUB:
            assert(control->i == 1);
            subi(cpu, control->rd, control->rs1, control->simm);
            break;
        case OP3_SUBCC:
            assert(control->i == 1);
            subicc(cpu, control->rd, control->rs1, control->simm);
            break;
        case OP3_JMPL:
            assert(control->i == 1);
            jmpli(cpu, control->rd, control->rs1, control->simm);
            break;
        default:
            unknown_inst(cpu, control);
    }
}

void process_format_3_mem(struct CPU *cpu, struct control_signals *control) {
    assert(control->op == OP_FORMAT_3_MEM);
    switch(control->op3)
    {
        case OP3_LD:
            assert(control->i == 1);
            ld(cpu, control->rd, control->rs1, control->simm);
            break;
        case OP3_ST:
            assert(control->i == 1);
            st(cpu, control->rd, control->rs1, control->simm);
            break;
        default:
            unknown_inst(cpu, control);
    }
}

bool compile(struct CPU *cpu, struct control_signals *control) {
    cpu->pc += sizeof(u32);

    cpu->translationDone = false;
    switch (control->op) {
        case OP_FORMAT_1:
            process_format_1(cpu, control);
            break;
        case OP_FORMAT_2:
            process_format_2(cpu, control);
            break;
        case OP_FORMAT_3_ALU:
            process_format_3_alu(cpu, control);
            break;
        case OP_FORMAT_3_MEM:
            process_format_3_mem(cpu, control);
            break;
        default:
            unknown_inst(cpu, control);
    }

    return cpu->translationDone;
}

/********************** Translation cache handling ************************/

// we use the tcbMap as a direct mapping from instruction offset to
// translated code.  because instructions are 32 bits (4 bytes) and
// aligned, the lowest 2 bits of the PC are always zero.
int getTcbMapIdx(u32 pc_enter) {
    return (pc_enter >> 2) % NUM_TCB_ENTRIES;
}

void add_to_translation_cache(struct TCB *tcb) {
        int mapIdx = getTcbMapIdx(tcb->pc_enter);

        if(tcbMap[mapIdx] != NULL) {
                munmap(tcbMap[mapIdx]->buffer, tcbMap[mapIdx]->totalSize);
                delete tcbMap[mapIdx];
        }
        tcbMap[mapIdx] = tcb;
}

bool in_translation_cache(u32 pc) {
    int mapIdx = getTcbMapIdx(pc);
    if(tcbMap[mapIdx] != NULL) {
            return tcbMap[mapIdx]->pc_enter == pc;
    }
    return false;
}

struct TCB *lookup_in_translation_cache(u32 pc) {
        assert(in_translation_cache(pc));
        return tcbMap[getTcbMapIdx(pc)];
}

/***************************************************************************/

void translate(struct CPU *cpu) {
    struct control_signals control;
    struct TCB *tcb = new struct TCB;

    tcb->bytesInCache = 0;
    tcb->buffer = (u8 *) mmap(NULL, TCB_SIZE,
                              PROT_READ | PROT_WRITE | PROT_EXEC,
                              MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
    if (tcb->buffer == MAP_FAILED) {
        perror("mmap");
        exit(0);
    }
    tcb->totalSize = TCB_SIZE;
    tcb->pc_enter = cpu->pc;

    add_to_translation_cache(tcb);

    cpu->tcb = tcb;

    // setup the initialization instructions for this translation block
    u32 opcode = 0x5350;
    store_in_cache(tcb, &opcode, 2);

    do {
        opcode = fetch(cpu);
        decode_control_signals(opcode, &control);
    } while (!compile(cpu, &control));

    // make sure to restore the PC state before jumping back since this changes
    // during compilation
    tcb->pc_exit = cpu->pc;
    cpu->pc = tcb->pc_enter;
}

void jump_to_tc(struct CPU *cpu) {
    struct TCB *tcb = lookup_in_translation_cache(cpu->pc);
    cpu->tcb = tcb;
    void (*foo) (struct CPU *) = (void (*)(struct CPU *)) cpu->tcb->buffer;
    cpu->pc = tcb->pc_exit;
    foo(cpu);

    // we just got back from a TC, let's do a little checking
    cpu->tcb = NULL;
    assert(cpu->regs[0] == 0);
    u32 ramValue = *((u32 *) (ram + RAM_SIZE - sizeof(u32)));
    if (ramValue != 0) {
        cout << ramValue << endl;
        if (ramValue == 3524578) {
            cout << "totalTime = " << time(NULL) - startTime << endl;
            userQuit = true;
        }
        *((u32 *) (ram + RAM_SIZE - sizeof(u32))) = 0;
    }
}

string prompt(const char *str) {
    string ret;
    cout << str;
    cout.flush();
    cin >> ret;
    return ret;
}

void fillState(const char *fileName, void *buf, int size) {
    int ret, fd;

    assert(size > 0);

    fd = open(fileName, O_RDONLY);
    assert(fd >= 0);

    ret = read(fd, buf, size);
    assert((ret == size) && (ret > 0));

    close(fd);
}

void saveState(const char *fileName, void *buf, int size) {
    int ret, fd;

    assert(size > 0);

    fd = open(fileName, O_WRONLY | O_TRUNC | O_CREAT, 0644);
    assert(fd >= 0);

    ret = write(fd, buf, size);
    assert(ret == size);

    close(fd);
}

void cpu_exec(struct CPU *cpu) {
    while (!userQuit) {
        // if the code at cpu->pc has already been translated to
        // native x86-64 code, run it directly.
        if (in_translation_cache(cpu->pc)) {
            jump_to_tc(cpu);
        } else {
            translate(cpu);
        }
    }

    if (userQuit) {
        if (prompt("would you like to save your system state (y/n)?: ") == "y") {
            string memFileName = prompt("mem file name: ");
            string cpuFileName = prompt("cpu file name: ");
            saveState(memFileName.c_str(), ram, RAM_SIZE);
            saveState(cpuFileName.c_str(), cpu, sizeof(struct CPU));
        }
    }
}

int main(int argc, const char *argv[]) {
    ifstream infile;
    struct CPU *cpu;

    if (argc < 2) {
        cerr << "Usage: " << argv[0] << " memory_file [cpu_file]" << endl;
        return -1;
    }

    signal(SIGINT, ctrlC);

    for (int idx = 0; idx < NUM_TCB_ENTRIES; idx++) {
        tcbMap[idx] = NULL;
    }

    // initialize our state
    ram = new uint8_t[RAM_SIZE];
    cpu = new struct CPU;
    for(int idx = 0; idx < NUM_REGS; idx++) {
        cpu->regs[idx] = 0;
    }
    cpu->pc = 0;
    cpu->branch_pc = (u32) -1;
    cpu->ram = ram;
    cpu->translationDone = false;
    cpu->icc_z = false;
    cpu->icc_n = false;
    cpu->regs[14] = RAM_SIZE-4-120;     // setup the stack pointer
    cpu->regs[8] = RAM_SIZE-4;          // setup the fib program

    // fetch our memory image and cpu state (if set)
    fillState(argv[1], ram, RAM_SIZE);
    if (argc >= 3) {
        fillState(argv[2], cpu, sizeof(struct CPU));
    }
    startTime = time(NULL);
    cpu_exec(cpu);

    return 0;
}