////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2005 Sonics, Inc.
//
// Confidential and Proprietary Information of Sonics, Inc.
// Use, disclosure, or reproduction is prohibited without
// written permission from Sonics, Inc.
//
// $Id: SlaveTL2.cpp,v 1.1 2007/01/25 21:51:22 halexan Exp $
//
////////////////////////////////////////////////////////////////////////////////

#include <stdio.h>
#include "ScModelCommon.h"
#include "SlaveTL2.h"
#include "ocp_param.h"
#include "MemoryCl.h"
#include "SimControl.h"
#include "ScSemaphore.h"
#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 2
#include "boost/pool/pool.hpp"
#endif


#define MAX_THREADS 16

// debug messages are sent through this macro, normally disabled
#ifndef NDEBUG
extern bool g_debug;
#define DBG_MSG( msg ) if ( g_debug ) cout << sc_time_stamp() \
        << "(" << sc_get_curr_simcontext()->delta_count()     \
        << "): " << msg << endl; // or something like that
#else
#define DBG_MSG( msg )
#endif

using namespace OcpIp;

namespace {
// utility class for thread busy access
template <typename Td, typename Ta>
struct MThreadBusyTL2 : public Sonics::ThreadBusyProxy {
    MThreadBusyTL2( OCP_TL2_SlavePort<Td , Ta >& port )
        : m_port ( port ) {}
    virtual ~MThreadBusyTL2() {}
    OCP_TL2_SlavePort<Td , Ta >& m_port;
    virtual sc_event_finder& threadBusyEvent() {
        return m_port.MThreadBusyEvent();
    }
    virtual unsigned int     getThreadBusy  () const {
        if ( !m_port->GetParamCl()->mthreadbusy )
            return 0;
        return m_port->getMThreadBusy();
    }
};
InitMode convertInitMode( const char* mode ) {
    if (0 == strcasecmp(mode,"fixed"))        return IM_FIXED;
    else if (0 == strcasecmp(mode,"random"))  return IM_RANDOM;
    else if (0 == strcasecmp(mode,"address")) return IM_ADDRESS;
    else if (0 == strcasecmp(mode,"reqinfo")) return IM_REQINFO;
    return IM_UNKNOWN;
}
InitMode convertInitMode( int numParam ) {
    switch ( numParam ) {
    case 1: return IM_FIXED;
    case 2: return IM_RANDOM;
    case 3: return IM_ADDRESS;
    case 4: return IM_REQINFO;
    default: return IM_UNKNOWN;
    }
}
// To repeat ReqInfo word into response word
template <typename T, typename Tw>
struct RepeatWord {
    static const int DataSize = sizeof( T );
    static const int WordSize = sizeof( Tw );
    T operator()( const Tw& word ) {
        T t = word;
        for ( int w=1; w<DataSize/WordSize; ++w ) 
            t = ( t << 8 * WordSize ) | word;
        return t;
    }
};
// degenerate partial specialization
template <typename T>
struct RepeatWord<T, T> {
    T operator()( const T& word ) {
        return word;
    }
};

#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 2
// Would like to use a pool to copy the heap data from the requests.
// But I don't know how to use the boost pool to allocate an array of non-POD
// objects (pool is for POD, object_pool is for non-POD but does not have a
// malloc(n)
template <typename T>
static boost::pool<>& getPool() {
    static boost::pool<> sPool( sizeof( T ) );
    return sPool;
}
#endif

template <typename T>
static T* copyN( const T* p, size_t n ) {
    assert( p != NULL );
    T* pCopy = new T[n]; // getPool<T>(). ???
    copy( p, p+n, pCopy );
    return pCopy;
}

bool g_sResetEnabled = false;
}

#define DBG_MASK   TRANSACTOR_DBG_MASK
#undef  DBG_STREAM
#define DBG_STREAM message

// ----------------------------------------------------------------------------
// constructor
// ----------------------------------------------------------------------------
template< class Td, class Ta >
SlaveTL2<Td,Ta>::SlaveTL2( sc_module_name name ) :
    sc_module( name ),
    tpP("tp"),
    m_pOcpParams(NULL),
    m_responseLatency(1),
    m_postedWriteLatency(1),
    m_sResetAsserted( false ),
    m_activeResponseThread( -1 ),
    m_maxActiveRequests( -1 ),
    m_maxActiveReadData( -1 ),
    m_maxActiveWriteData( -1 ),
//     m_memSizeInBytes( getDmCore()->paramDefined( "mem_2size" ) ? 
//                       Ta(1) << getDmCore()->getIntParam( "mem_2size" ) :
//                       static_cast<Ta>( -1 ) ),
    m_memSizeInBytes( static_cast<Ta>( -1 ) ),
    m_pMemory(NULL),
    m_pThreadBusyAccess( new MThreadBusyTL2<Td, Ta>( tpP ) ),
    m_threadArbiter( "threadbusy_arbiter", m_pThreadBusyAccess )
{
  // Get new request whenever one comes in
  SC_METHOD( getRequestMethod );
  sensitive << tpP.RequestStartEvent();
  dont_initialize();

  SC_METHOD( prepareResponseMethod );
  sensitive << m_processEvent;
  dont_initialize();

  // Accept the request
  SC_METHOD( acceptRequestMethod );

  // Send response
  for ( int i=0; i<MAX_THREADS; ++i ) {
      char respName[40];
      sprintf( respName, "responder_%d", i );
      m_responders.push_back( new Responder( respName, *this, i ) );
  }

  SC_METHOD(acceptedResponseMethod);
  sensitive << tpP.ResponseEndEvent();
  dont_initialize();

  SC_METHOD(SResetInitMethod);
  // do initialize!
}

// --------------------------------------------------------------------------
// destructor
// --------------------------------------------------------------------------
template< class Td, class Ta >
SlaveTL2<Td,Ta>::~SlaveTL2()
{
    for ( typename vector<Responder*>::iterator it = m_responders.begin();
          it != m_responders.end(); ++it ) {
        delete *it;
    }
    delete m_pMemory;
}

// --------------------------------------------------------------------------
//  SystemC Method SlaveTL2::end_of_elaboration()
// --------------------------------------------------------------------------
// 
//  At this point, everything has been built and connected.
//  We are now free to get our OCP parameters and to set up our
//  own variables that depend on them.
//
template< class Td, class Ta >
void SlaveTL2<Td,Ta>::end_of_elaboration()
{
    sc_module::end_of_elaboration();

    // Process OCP Parameters from the port

    m_pOcpParams = tpP->GetParamCl();
    m_clkPeriod = tpP->getPeriod();

    // Process Slave Parameters
    // Store parameters
    STimingGrp& timing = m_sTimes;
    InitMode   memoryMode = IM_UNKNOWN;
    int memData = 0;

    m_maxActiveRequests  = m_config.request_buffers;
    m_maxActiveReadData  = m_config.readdata_buffers;
    m_maxActiveWriteData = m_config.writedata_buffers;
      
    // latencies
    timing.RqAL              = m_config.reqaccept_latency;
    timing.DAL               = m_config.dataaccept_latency;
    timing.RpSndI            = 1; // TL2 specific ?
    m_responseLatency        = m_config.req2resp_latency;
    m_postedWriteLatency     = m_config.postedwrite_latency;
      
    // memory initialization
    memoryMode        = convertInitMode( m_config.meminit.c_str() );
    memData           = m_config.meminit_fixeddata;

    // 0 makes no sense. Interpret it as no limit
    if ( m_maxActiveRequests  == 0 ) m_maxActiveRequests  = -1;
    if ( m_maxActiveReadData  == 0 ) m_maxActiveReadData  = -1;
    if ( m_maxActiveWriteData == 0 ) m_maxActiveWriteData = -1;

    // Memory instantiation
    if ( memoryMode == IM_FIXED ) {
        if ( memData >= 0x100 ) {
            cerr << "WARNING: SlaveTL2 - illegal meminit_fixeddata ("
                 << memData << ") value; reset to 0."
                 << endl;
            memData = 0x0;
        }
    }
    string memName = name();
    memName += " memory";
    assert( (m_pOcpParams->data_wdth)%8 == 0 );
    Ta addrIncr = m_pOcpParams->data_wdth / 8;;
    m_pMemory = new MemoryCl<Td,Ta>( memName, m_pOcpParams->addr_wdth,
                                     addrIncr, memoryMode, memData );
    tpP->putSlaveTiming( timing );
    
    // Threaded data structures
    int threads = m_pOcpParams->threads;
    m_threadArbiter.setNumThreads( threads );
    m_responseCandidate.resize( threads );
    m_responseSemaphore.resize( threads );
    m_numActiveRequests.resize( threads );
    m_numActiveReadData.resize( threads );
    m_numActiveWriteData.resize( threads );
    m_burstCounter. resize( threads );
    m_burstSequence.resize( threads, BurstSequence<Ta>( m_pOcpParams->data_wdth,
                                                        m_pOcpParams->addr_wdth,
                                                        0, OCP_MBURSTSEQ_INCR ) );
    for ( int t=0; t < threads; ++t ) {
        m_responseSemaphore [t] = new sc_extension::ScSemaphore();
        m_responseCandidate[t].second.SThreadID = t;
    }
}

// --------------------------------------------------------------------------
// SystemC Methods
// --------------------------------------------------------------------------
// Prepare a response when timestamp is met
template< class Td, class Ta >
void SlaveTL2<Td,Ta>::prepareResponseMethod()
{
    assert( m_deletedEntries.empty() );
    m_deletedEntries.clear();
    bool processingDone = false;
    for ( QueueIterator it = m_requestQueue.begin(); it != m_requestQueue.end();
          ++it ) {
        if ( it->first > sc_time_stamp() )
            break; // too early to process, and map is ordered so can exit now

        const Request& request = it->second;
        int thread = ( m_pOcpParams->threads > 1 ) ? request.MThreadID : 0;
        // Get the semaphore once response is no longer in process
        if ( m_responseSemaphore[thread]->trywait() == -1 )
            continue;

        // Prepare response
        Response& response = m_responseCandidate[thread].second;
        assert( response.SThreadID == static_cast<unsigned int>( thread ) );
        bool needsResponse = processRequest( request, response );
        processingDone = true;
        if ( needsResponse ) {
            m_responseCandidate[thread].first = it;
            m_threadArbiter.threadReady( thread );
            DBG_MSG( "prepared " << hex << response << " for request "
                     << request << dec );
        } else {
            // free up the entry
            DBG_MSG( "processed " << hex << request << dec );
            m_deletedEntries.push_back( it );
            m_responseSemaphore[thread]->post();
        }
    }
    for ( typename std::deque<QueueIterator>::iterator it =
              m_deletedEntries.begin(); it != m_deletedEntries.end(); ++it ) {
        Request& request = (**it).second;
        // free the dynamically allocated copies
        if ( OcpIp::isWrite( request.MCmd ) ) {
            assert( request.MDataPtr != NULL );
            delete[] request.MDataPtr;
            if ( request.MDataInfoPtr != NULL )
                delete[] request.MDataInfoPtr;
        }
        if ( request.MByteEnPtr != NULL ) {
            delete[] request.MByteEnPtr;
        }
        releaseRequestEntry( *it );
    }
    m_deletedEntries.clear();

    if ( processingDone )
        scheduleProcessing();
}

//////////////////////////////////////////////////////////////////
// FUNCTION: processRequest
// DESCRIPTION: access memory and build response
// ARGUMENTS: Request and Response by reference
// RETURNS: true if a response was made
//////////////////////////////////////////////////////////////////
template< class Td, class Ta >
bool SlaveTL2<Td,Ta>::processRequest( const Request& request, Response& response )
{
    bool isWrite = OcpIp::isWrite( request.MCmd );
    bool needsResponse = !isWrite ||
        ( m_pOcpParams->writeresp_enable &&
          ( !request.MBurstSingleReq || request.LastOfBurst ) );

    BurstSequence<Ta>& burstSequence = m_burstSequence[request.MThreadID];
    BurstCounter&      burstCounter  = m_burstCounter[request.MThreadID];
    burstCounter.next( request );
    // new burst, re-initialize address sequence
    if ( burstCounter.count() == request.DataLength )
        burstSequence.init( request.MAddr, request.MBurstSeq,
                            request.MBurstLength, request.MBurstPrecise,
                            request.MBlockHeight, request.MBlockStride );

    response.STagID = request.MTagID;
    Ta address = request.MAddr;
    unsigned int byteEn = request.MByteEn;
    if ( needsResponse ) {
        response.DataLength  = request.DataLength;
        if ( isWrite && m_pOcpParams->burstsinglereq && request.MBurstSingleReq )
            response.DataLength = 1;
        if ( !isWrite ) {
            // TODO: Leak! When do we free this?
            response.SDataPtr = new Td[response.DataLength];
        }
        response.LastOfBurst = request.LastOfBurst;
    }

    for( unsigned int i=0; i< request.DataLength; ++i ) {
        if ( request.MByteEnPtr != NULL ) {
            byteEn = request.MByteEnPtr[i];
        }
        if ( request.MBurstSeq != OCP_MBURSTSEQ_UNKN ) {
            address = burstSequence.next();
        }
        if ( address >= m_memSizeInBytes) {
            address -= m_memSizeInBytes;
        }        
        if ( isWrite ) {
            assert( request.MDataPtr != NULL );
            m_pMemory->write( address, request.MDataPtr[i], byteEn );
        } else {
            assert( response.SDataPtr != NULL );
            if ( m_pMemory->m_InitMode != IM_REQINFO ) {
                m_pMemory->read( address, response.SDataPtr[i], byteEn );
            } else
                response.SDataPtr[i] =
                    RepeatWord<Td, unsigned int>()( request.MReqInfo );
        }
    }
    if ( needsResponse )
        response.SResp = OCP_SRESP_DVA;
    return needsResponse;
}   

// Response was accepted, free up the queue
template< class Td, class Ta >
void SlaveTL2<Td,Ta>::acceptedResponseMethod()
{
    DBG_MSG( "response on thread " << m_activeResponseThread << " accepted" );
    assert( m_activeResponseThread >= 0 );
    if ( m_activeResponseThread < 0 )
        return;

    // free a slot in the queue
    releaseRequestEntry( m_responseCandidate[m_activeResponseThread].first );
    m_responseSemaphore[m_activeResponseThread]->post();
    m_activeResponseThread = -1;
    scheduleProcessing();
}

template< class Td, class Ta >
void
SlaveTL2<Td,Ta>::enableAutoSReset()
{
    g_sResetEnabled = true;
}

template< class Td, class Ta >
void SlaveTL2<Td,Ta>::SResetInitMethod()
{
    if ( g_sResetEnabled && m_pOcpParams->sreset ) {
        if ( !m_sResetAsserted ) {
            tpP->SResetAssert();
            m_sResetAsserted = true;
            next_trigger( 16.1 * m_clkPeriod );
        } else {
            tpP->SResetDeassert();
        }
    }
}

// Triggered by the channel's RequestStartEvent
// when a new request is available on the channel
template< class Td, class Ta >
void SlaveTL2<Td,Ta>::getRequestMethod()
{
    // If this function is called, there must be a request waiting
    Request& request = m_currentRequest;
    bool valid = tpP->getOCPRequest( request );
    assert( valid );
    int thread = request.MThreadID;

    DBG_MSG( "received " << hex << request << dec );
    // copy dynamically allocated data since we are going to queue the
    // request for later processing
    if ( OcpIp::isWrite( request.MCmd ) ) {
        request.MDataPtr = copyN( request.MDataPtr, request.DataLength );
        if ( request.MDataInfoPtr != NULL ) {
            request.MDataInfoPtr = copyN( request.MDataInfoPtr,
                                          request.DataLength );
        }
    }
    if ( request.MByteEnPtr != NULL ) {
        request.MByteEnPtr = copyN( request.MByteEnPtr, request.DataLength );
    }

    if ( m_pOcpParams->sthreadbusy ) {
        // Need to assess busy now. Ask if this requests puts us over the edge
        tpP->putSThreadBusyBit( !canAcceptRequest( request ), thread );
    }

    unsigned int latency = tpP->getTL2ReqDuration();
    if ( request.DataLength == 1 ) {
        tpP->getSlaveTiming( m_sTimes );
        latency = m_sTimes.RqAL;
    }
    m_acceptRequestEvent.notify( latency * m_clkPeriod );    
}

template< class Td, class Ta >
void SlaveTL2<Td,Ta>::scheduleProcessing()
{
    if ( m_requestQueue.empty() )
        return;

    for ( QueueIterator it = m_requestQueue.begin(); it != m_requestQueue.end();
          ++it ) {
        // entries in map are sorted by time
        int thread =  it->second.MThreadID;
        if ( m_responseSemaphore[thread]->get_value() <= 0 )
            continue; // in progress
        const sc_time& dueTime = it->first;        
        sc_time delay = SC_ZERO_TIME;
        if ( dueTime > sc_time_stamp() )
            delay = dueTime - sc_time_stamp();        
        m_processEvent.cancel();
        m_processEvent.notify( delay );
        DBG_MSG( "scheduled processing of " << hex  << it->second
                 << " @" << dec << dueTime );
        break;
    }
}

//////////////////////////////////////////////////////////////////
// FUNCTION: SlaveTL2::updateActiveCounts
// DESCRIPTION: When a request is accepted, need to count the request
// and data buffers that are now taken
// ARGUMENTS: Request
// RETURNS: Nothing
//////////////////////////////////////////////////////////////////
template< class Td, class Ta >
void
SlaveTL2<Td,Ta>::updateActiveCounts( const Request& request )
{
    unsigned int thread = request.MThreadID;
    m_numActiveRequests[thread] += 1;
    if ( OcpIp::isWrite( request.MCmd ) )
        m_numActiveWriteData[thread] += request.DataLength;
    else
        m_numActiveReadData[thread]  += request.DataLength;

    DBG_MSG( "thread=" << thread << " m_numActiveWriteData=" << m_numActiveWriteData[thread] );
    DBG_MSG( "thread=" << thread << " m_numActiveReadData=" << m_numActiveReadData[thread] );
}

//////////////////////////////////////////////////////////////////
// FUNCTION: SlaveTL2::releaseRequestEntry
// DESCRIPTION: called when the slave is done processing a request
// ARGUMENTS: QueueIterator
// RETURNS: Nothing
// SIDE EFFECTS: may unset threadbusy, decrements active count
//////////////////////////////////////////////////////////////////
template< class Td, class Ta >
void
SlaveTL2<Td,Ta>::releaseRequestEntry( QueueIterator it )
{
    const Request& request = (*it).second;
    unsigned int thread = request.MThreadID;

    --( m_numActiveRequests[thread] );
    assert( m_numActiveRequests[thread] >= 0 );

    if ( OcpIp::isWrite( request.MCmd ) ) {
        m_numActiveWriteData[thread] -= request.DataLength;
        assert( m_numActiveWriteData[thread] >= 0 );
    } else {
        m_numActiveReadData[thread] -= request.DataLength;
        assert( m_numActiveReadData[thread] >= 0 );
    }

    if ( m_pOcpParams->sthreadbusy ) {
        tpP->putSThreadBusyBit( false, thread );
    }
    m_requestQueue.erase( it );

    DBG_MSG( "thread=" << thread << " m_numActiveWriteData=" << m_numActiveWriteData[thread] );
    DBG_MSG( "thread=" << thread << " m_numActiveReadData=" << m_numActiveReadData[thread] );
}

// Accept the Request
template< class Td, class Ta >
void
SlaveTL2<Td,Ta>::acceptRequestMethod()
{   
    // the idle check should only handle the initialize call 
    next_trigger( m_acceptRequestEvent );
    if ( m_currentRequest.MCmd == OCP_MCMD_IDLE )
        return;
    
    unsigned int& thread = m_currentRequest.MThreadID;

    if ( canAcceptRequest( m_currentRequest.MCmd, thread ) ) {
        DBG_MSG( "accepted request" );
        tpP->acceptRequest();
        updateActiveCounts( m_currentRequest );
        // Compute latency and queue up request
        int latency = m_responseLatency;    
        if ( OcpIp::isWrite( m_currentRequest.MCmd ) ) {
            if ( m_currentRequest.MCmd == OCP_MCMD_WR &&
                 !m_pOcpParams->writeresp_enable && m_postedWriteLatency >= 0 )
                latency = m_postedWriteLatency;
            if ( m_pOcpParams->datahandshake ) {
                tpP->getMasterTiming(m_mTimes);        
                latency += m_mTimes.RqDL;
            }
        }
        sc_time dueTime = sc_time_stamp() + ( m_clkPeriod * latency );
        m_requestQueue.insert( make_pair( dueTime, m_currentRequest ) );
        scheduleProcessing();
    } else {
        next_trigger( m_responseSemaphore[thread]->free_event() );
    }
}

//////////////////////////////////////////////////////////////////
// FUNCTION: SlaveTL2::canAcceptRequest
// DESCRIPTION: check buffer occupancy for flow control
// ARGUMENTS: 2 versions: with command and thread id, determines
// if we are already busy.
// With a const Request&, determines if we would be busy after
// accepting this request
// RETURNS: yes or no
//////////////////////////////////////////////////////////////////
template< class Td, class Ta >
bool
SlaveTL2<Td,Ta>::canAcceptRequest( OCPMCmdType cmd, unsigned int thread ) const
{
    bool isWrite = OcpIp::isWrite( cmd );
    if ( m_maxActiveRequests > 0 &&
         m_numActiveRequests[thread] >=
         static_cast<unsigned int>( m_maxActiveRequests ) )
        return false;

    if ( !isWrite && m_maxActiveReadData > 0 &&
         m_numActiveReadData[thread] >=
         static_cast<unsigned int>( m_maxActiveReadData ) )
        return false;

    if ( isWrite && m_maxActiveWriteData > 0 &&
         m_numActiveWriteData[thread] >=
         static_cast<unsigned int>( m_maxActiveWriteData ) )
        return false;
    
    return true;
}

template< class Td, class Ta >
bool
SlaveTL2<Td,Ta>::canAcceptRequest( const Request& request ) const
{
    bool isWrite               = OcpIp::isWrite( request.MCmd );
    unsigned int thread        = request.MThreadID;

    // projected buffer utilization
    unsigned int numActiveRequests, numActiveReadData, numActiveWriteData;
    numActiveRequests = m_numActiveRequests[thread] + 1;
    if ( OcpIp::isWrite( request.MCmd ) )
        numActiveWriteData = m_numActiveWriteData[thread] + request.DataLength;
    else
        numActiveReadData = m_numActiveReadData[thread] + request.DataLength;

    if ( m_maxActiveRequests > 0 && numActiveRequests >=
         static_cast<unsigned int>( m_maxActiveRequests ) )
        return false;

    if ( !isWrite && m_maxActiveReadData > 0 &&
         numActiveReadData >= static_cast<unsigned int>( m_maxActiveReadData ) )
        return false;

    if ( isWrite && m_maxActiveWriteData > 0 &&
         numActiveWriteData >= static_cast<unsigned int>( m_maxActiveWriteData ) )
        return false;

    return true;
}

template <typename Td, typename Ta>
SlaveTL2<Td,Ta>::Responder::Responder( sc_module_name name,
                                       SlaveTL2<Td,Ta>& slave,
                                       int thread ) :
    sc_module( name ),
    m_slave  ( slave ),
    m_thread( thread )
{
    SC_METHOD( sendMethod );
}

// send a response
template< class Td, class Ta >
void SlaveTL2<Td,Ta>::Responder::sendMethod()
{
    if ( m_thread >= m_slave.m_pOcpParams->threads )
        return; // and never wake up again!

    next_trigger( m_slave.m_threadArbiter.threadWinner( m_thread ) );

    Response& response = m_slave.m_responseCandidate[m_thread].second;
    if ( response.SResp == OCP_SRESP_NULL )
        return;

    if ( m_slave.m_activeResponseThread != -1 ) {
        DBG_MSG( "Contention! could not send " << hex << response << dec );
        next_trigger( m_slave.m_responseSemaphore[m_slave.m_activeResponseThread]
                      ->free_event() );
        return;
    }
    bool sent = m_slave.tpP->sendOCPResponse( response );
    assert( sent );
    DBG_MSG( "sent " << hex << response << dec );
    m_slave.m_activeResponseThread = m_thread;
}

#undef MAX_THREADS
#undef DBG_MDG

