/***************************************************************************
 *   Copyright (C) 2021 PCSX-Redux authors                                 *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.           *
 ***************************************************************************/

#include "regAllocation.h"

#include <cassert>

#include "recompiler.h"

#if defined(DYNAREC_X86_64)

// Map the guest register corresponding to the index to a host register
// Used internally by the allocateReg functions. Don't use it directly
template <DynaRecCPU::LoadingMode mode>
void DynaRecCPU::reserveReg(int index) {
    const auto regToAllocate = allocateableRegisters[m_allocatedRegisters];  // Fetch the next host reg to be allocated
    m_gprs[index].allocatedReg = regToAllocate;
    m_gprs[index].markUnknown();     // Mark the register's value as unknown if it were previously const propagated
    m_gprs[index].allocated = true;  // Mark register as allocated
    m_gprs[index].allocatedRegIndex = m_allocatedRegisters;

    // For certain instructions like loads, we don't want to load the reg because it'll get instantly overwritten
    if constexpr (mode == LoadingMode::Load) {
        gen.mov(regToAllocate, dword[contextPointer + GPR_OFFSET(index)]);  // Load reg
    }
    m_hostRegs[m_allocatedRegisters].mappedReg = index;
    m_allocatedRegisters++;  // Advance our register allcoator
}

// Flush constants and allocated registers to host regs at the end of a block
void DynaRecCPU::flushRegs() {
    for (auto i = 1; i < 32; i++) {
        if (m_gprs[i].isConst()) {  // If const: Write the value directly, mark as unknown
            gen.mov(dword[contextPointer + GPR_OFFSET(i)], m_gprs[i].val);
            m_gprs[i].markUnknown();
        }

        else if (m_gprs[i].isAllocated()) {  // If it's been allocated to a register, unallocate
            m_gprs[i].allocated = false;
            if (m_gprs[i].writeback) {  // And if writeback was specified, write the value back
                gen.mov(dword[contextPointer + GPR_OFFSET(i)], m_gprs[i].allocatedReg);
                m_gprs[i].writeback = false;  // And turn writeback off
            }
        }
    }

    for (auto i = 0; i < ALLOCATEABLE_REG_COUNT; i++) {  // Unallocate all regs
        m_hostRegs[i].mappedReg = std::nullopt;
    }

    m_allocatedRegisters = 0;
}

// Spill the volatile allocated registers into guest registers in preparation for a call to a C++ function
void DynaRecCPU::prepareForCall() {
    if (m_allocatedRegisters > ALLOCATEABLE_NON_VOLATILE_COUNT) {  // Check if there's any allocated volatiles to flush
        for (auto i = ALLOCATEABLE_NON_VOLATILE_COUNT; i < m_allocatedRegisters; i++) {  // iterate volatile regs
            if (m_hostRegs[i].mappedReg) {  // Unallocate and spill to guest regs as appropriate
                const auto previous = m_hostRegs[i].mappedReg.value();  // Get previously allocated register
                if (m_gprs[previous].writeback) {                       // Spill to guest reg if writeback is enabled
                    gen.mov(dword[contextPointer + GPR_OFFSET(previous)], allocateableRegisters[i]);
                    m_gprs[previous].writeback = false;
                }

                m_gprs[previous].allocated = false;  // Unallocate it
                m_hostRegs[i].mappedReg = std::nullopt;
            }
        }

        // Since we just flushed all our volatiles, we can perform an optimization by making the allocator start
        // allocating from the first volatile again. This makes it so we have to flush less often, as we free up
        // regs every time we call a C++ function instead of letting them linger and go to waste.
        m_allocatedRegisters = ALLOCATEABLE_NON_VOLATILE_COUNT;
    }
}

// Used when our register cache overflows. Spill the entirety of it to host registers.
void DynaRecCPU::spillRegisterCache() {
    for (auto i = 0; i < m_allocatedRegisters; i++) {
        if (m_hostRegs[i].mappedReg) {  // Check if the register is still allocated to a guest register
            const auto previous = m_hostRegs[i].mappedReg.value();  // Get the reg it's allocated to

            if (m_gprs[previous].writeback) {  // Spill to guest register if writeback is enabled and disable writeback
                gen.mov(dword[contextPointer + GPR_OFFSET(previous)], allocateableRegisters[i]);
                m_gprs[previous].writeback = false;
            }

            m_hostRegs[i].mappedReg = std::nullopt;  // Unallocate it
            m_gprs[previous].allocated = false;
        }
    }

    m_allocatedRegisters = 0;  // Nothing is allocated anymore
}

void DynaRecCPU::allocateReg(int reg) {
    if (!m_gprs[reg].isAllocated()) {
        if (m_allocatedRegisters >= ALLOCATEABLE_REG_COUNT) {
            spillRegisterCache();
        }
        reserveReg<LoadingMode::Load>(reg);
    }
}

void DynaRecCPU::allocateRegWithoutLoad(int reg) {
    if (!m_gprs[reg].isAllocated()) {
        if (m_allocatedRegisters >= ALLOCATEABLE_REG_COUNT) {
            spillRegisterCache();
        }
        reserveReg<LoadingMode::DoNotLoad>(reg);
    }
}

// T: Number of regs without writeback we must allocate
// U: Number of regs with writeback we must allocate
// We want both of them to be compile-time constants for efficiency
template <int T, int U>
void DynaRecCPU::allocateRegisters(std::array<int, T> regsWithoutWb, std::array<int, U> regsWithWb) {
    static_assert(T + U < ALLOCATEABLE_REG_COUNT, "Trying to allocate too many registers");

start:
    // Which specific regs we need to load
    uint32_t regsToLoad = 0;
    // Which specific regs we need to allocate without loading, with writeback
    uint32_t regsToWriteback = 0;
    // How many registers we need to load
    int regsToAllocateCount = 0;

    for (int i = 0; i < T; i++) {
        const auto reg = regsWithoutWb[i];
        if (!m_gprs[reg].allocated && (regsToLoad & (1 << reg)) == 0) {
            regsToLoad |= 1 << reg;
            regsToAllocateCount++;
        }
    }

    for (int i = 0; i < U; i++) {
        const auto reg = regsWithWb[i];
        if (!m_gprs[reg].allocated && (regsToWriteback & (1 << reg)) == 0 && (regsToLoad & (1 << reg)) == 0) {
            regsToWriteback |= 1 << reg;
            regsToAllocateCount++;
        }
    }

    if (regsToAllocateCount != 0) {
        // Flush register cache if we're going to overflow it and restart alloc process
        if (m_allocatedRegisters + regsToAllocateCount >= ALLOCATEABLE_REG_COUNT) {
            flushRegs();
            goto start;
        }

        // Check which registers we need to load
        for (int i = 0; i < T; i++) {
            const auto reg = regsWithoutWb[i];
            if ((regsToLoad & (1 << reg)) != 0 && !m_gprs[reg].allocated) {
                reserveReg<LoadingMode::Load>(reg);
            }
        }
    }

    // Specify writeback for whatever regs we need to
    for (int i = 0; i < U; i++) {
        const auto reg = regsWithWb[i];
        if (!m_gprs[reg].allocated) {
            reserveReg<LoadingMode::DoNotLoad>(reg);
        }
        m_gprs[reg].writeback = true;
    }
}

void DynaRecCPU::alloc_rt_rs(uint32_t code) { allocateRegisters<2, 0>({(int)_Rt_, (int)_Rs_}, {}); }

void DynaRecCPU::alloc_rt_wb_rd(uint32_t code) { allocateRegisters<1, 1>({(int)_Rt_}, {(int)_Rd_}); }

void DynaRecCPU::alloc_rs_wb_rd(uint32_t code) { allocateRegisters<1, 1>({(int)_Rs_}, {(int)_Rd_}); }

void DynaRecCPU::alloc_rs_wb_rt(uint32_t code) { allocateRegisters<1, 1>({(int)_Rs_}, {(int)_Rt_}); }

void DynaRecCPU::alloc_rt_rs_wb_rd(uint32_t code) { allocateRegisters<2, 1>({(int)_Rt_, (int)_Rs_}, {(int)_Rd_}); }

#endif  // DYNAREC_X86_64
