boinc/sched/edf_sim.cpp

// This file is part of BOINC.
// http://boinc.berkeley.edu
// Copyright (C) 2023 University of California
//
// BOINC is free software; you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License
// as published by the Free Software Foundation,
// either version 3 of the License, or (at your option) any later version.
//
// BOINC is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with BOINC.  If not, see <http://www.gnu.org/licenses/>.

// Logic for doing deadline-based simulations of client workloads
// to decide whether sending new jobs would cause deadline misses.
// This is enabled by the config.xml <workload_sim> flag.
//
// This hasn't been shown to be beneficial.
// It's turned off by default.

#include "boinc_stdio.h"

#include <vector>
#include <algorithm>
#include <cstring>
#include <cstdarg>

#ifdef SIM
const int MAX_CPUS=4096;
#else
#include "sched_config.h"
#include "sched_msgs.h"
#include "sched_send.h"
#endif

#include "edf_sim.h"
#ifdef SIM
extern FILE* logfile;
#endif

using std::vector;

#define SUMMARY 0
    // show candidate decision
#define WORKLOAD  1
    // show workload
#define DETAIL   2
    // show every step of simulation

#define TIME_SCALE 1
//#define TIME_SCALE 3600

#ifdef SIM
static void log_msg(int, const char* format, ...) {
#else
static void log_msg(int level, const char* format, ...) {
    switch(level) {
    case SUMMARY:
        if (!config.debug_send) return;
        break;
    case WORKLOAD:
        if (!config.debug_edf_sim_workload) return;
        break;
    case DETAIL:
        if (!config.debug_edf_sim_detail) return;
        break;
    }
#endif
    va_list va;
    va_start(va, format);
#ifdef SIM
    vfprintf(logfile, format, va);
#else
    log_messages.vprintf(MSG_NORMAL, format, va);
#endif
    va_end(va);
}

bool lessthan_deadline(const IP_RESULT& p1, const IP_RESULT& p2) {
    if (p1.computation_deadline < p2.computation_deadline) return true;
    return false;
}

// run an EDF simulation, marking which results will miss
// their deadlines and when
//
void mark_edf_misses (int ncpus, vector<IP_RESULT>& ip_results){
    vector<IP_RESULT>::iterator ipp_it;
    double booked_to[MAX_CPUS];
    int j;

    log_msg(DETAIL, "[edf_detail] mark_edf_misses\n");

    // keeps track of when each cpu is next free
    //
    for (j=0; j<ncpus; j++) {
        booked_to[j] = 0;
    }

    // sort ipp_results in ascending order by deadline
    //
    sort(ip_results.begin(), ip_results.end(), lessthan_deadline);

    // simulate edf execution of all queued results
    //
    for (unsigned int i=0; i<ip_results.size(); i++) {
        IP_RESULT& r = ip_results[i];

        // find the CPU that will be free first
        //
        double lowest_booked_time = booked_to[0];
        int lowest_booked_cpu = 0;
        for (j=1; j<ncpus; j++) {
            if (booked_to[j] < lowest_booked_time) {
                lowest_booked_time = booked_to[j];
                lowest_booked_cpu = j;
            }
        }

        booked_to[lowest_booked_cpu] += r.cpu_time_remaining;
        log_msg(DETAIL, "[edf_detail]   running %s on cpu %d; finishes at %.2f\n",
            r.name, lowest_booked_cpu, booked_to[lowest_booked_cpu]/TIME_SCALE
        );
        if (booked_to[lowest_booked_cpu] > r.computation_deadline) {
            r.misses_deadline = true;
            r.estimated_completion_time = booked_to[lowest_booked_cpu];
            log_msg(DETAIL, "[edf_detail]   %s misses_deadline; est completion %.2f\n",
                r.name, booked_to[lowest_booked_cpu]/TIME_SCALE
            );
        } else {
            r.misses_deadline = false;
            log_msg(DETAIL, "[edf_detail]   %s makes deadline; est completion %.2f\n",
                r.name, booked_to[lowest_booked_cpu]/TIME_SCALE
            );
            // if result doesn't miss its deadline,
            // then the estimated_completion_time is of no use
        }
    }
}

// For each ip_result, compute computation_deadline from report_deadline,
// and determine if the deadline would be missed by simulating EDF
//
void init_ip_results(
    double work_buf_min,
    int ncpus,
    vector<IP_RESULT>& ip_results
){
    unsigned int i;

    log_msg(DETAIL,
        "[edf_detail] init_ip_results; work_buf_min %.2f ncpus %d:\n",
        work_buf_min/TIME_SCALE, ncpus
    );
    for (i=0; i<ip_results.size(); i++) {
        IP_RESULT& r = ip_results[i];
        r.computation_deadline = r.report_deadline - work_buf_min;
        log_msg(DETAIL, "[edf_detail]     %s: deadline %.2f cpu %.2f\n",
            r.name, r.computation_deadline/TIME_SCALE, r.cpu_time_remaining/TIME_SCALE
        );
    }

    // run edf simulation to determine whether any results miss their deadline
    //
    mark_edf_misses(ncpus, ip_results);
}

#if 0
// Sort test_results by computation_deadline.
// For each test result in ascending order of deadline,
// see whether adding it to the work queue would cause deadline misses
// or deadline miss delays.
// If a test result passes these checks, append it to the work queue
// for further result additions.
// Return list of new results that can be sent to the client.
//
// NOTE: should we sort by increasing deadline or by increasing slack time?
//
vector<TEMP_RESULT> REQUEST_HANDLER_WORK_SEND::find_sendable_test_results (
    vector<TEMP_RESULT> test_results,
    int ncpus,
    vector<TEMP_RESULT> ipp_results,
    double cpu_pessimism_factor // = 1 by default
) {
    //test results to send
    vector<TEMP_RESULT> sendable_test_results;
    vector<TEMP_RESULT>::iterator test_results_it;

    sort(test_results.begin(), test_results.end(), lessthan_deadline);

    // see if each test result can be added to the work queue without
    // causing deadline misses or deadline miss delays
    //
    for (test_results_it = test_results.begin();
        test_results_it != test_results.end();
        test_results_it++
    ) {
        bool failed = result_cause_deadline_miss_or_delay(
            (*test_results_it).computation_deadline*cpu_pessimism_factor,
            (*test_results_it).cpu_time_remaining,
            ncpus,
            ipp_results
        );
        if (!failed){
            // add sendable result to work queue, copying by value, so that we
            // can evaluate what happens if we add more new results to the queue
            //
            ipp_results.push_back(*test_results_it);
            sendable_test_results.push_back (*test_results_it);
        }
    }
    return (sendable_test_results);
}
#endif

// Return false if
// 1) the candidate result X would cause another result Y to miss its deadline
//    (which Y would not have otherwise missed)
// 2) X causes another result Y to miss its deadline by more than
//    it otherwise would have, or
// 3) X would miss its deadline
//
bool check_candidate (
    IP_RESULT& candidate,
    int ncpus,
    vector<IP_RESULT> ip_results        // passed by value (copy)
) {
    double booked_to[MAX_CPUS];     // keeps track of when each cpu is free
    int j;

    log_msg(DETAIL, "[edf_detail] check_candidate %s: dl %.2f cpu %.2f\n",
        candidate.name, candidate.computation_deadline/TIME_SCALE,
        candidate.cpu_time_remaining/TIME_SCALE
    );

    for (j=0; j<ncpus; j++) {
        booked_to[j] = 0;
    }
    ip_results.push_back(candidate);

    // sort ip_results in ascending order by deadline
    //
    sort(ip_results.begin(), ip_results.end(), lessthan_deadline);

    // simulate execution of all queued results and test result
    //
    for (unsigned int i=0; i<ip_results.size(); i++) {
        IP_RESULT& r = ip_results[i];

        // find the CPU that will be free first
        //
        double lowest_booked_time = booked_to[0];
        int lowest_booked_cpu = 0;
        for (j=1; j<ncpus; j++) {
            if (booked_to[j] < lowest_booked_time) {
                lowest_booked_time = booked_to[j];
                lowest_booked_cpu = j;
            }
        }
        booked_to[lowest_booked_cpu] += r.cpu_time_remaining;
        log_msg(DETAIL, "[edf_detail]   running %s on cpu %d; deadline %f, finishes at %.2f\n",
            r.name, lowest_booked_cpu, r.computation_deadline, booked_to[lowest_booked_cpu]/TIME_SCALE
        );

        // return false if completion time if > computation_deadline AND
        // result would not have missed deadline to begin with
        //
        if (booked_to[lowest_booked_cpu] > r.computation_deadline
            && !r.misses_deadline
        ) {
            log_msg(SUMMARY,
                "[send]  cand. fails; %s now misses deadline: %.2f > %.2f\n",
                r.name, booked_to[lowest_booked_cpu]/TIME_SCALE,
                r.computation_deadline/TIME_SCALE
            );
            return false;
        }
        // check a late result (i.e., one that would have missed its
        // deadline) // would be made even later
        //
        if (r.misses_deadline
            && booked_to[lowest_booked_cpu] > r.estimated_completion_time
        ){
            log_msg(SUMMARY,
                "[send]  cand. fails; late result %s would be returned even later\n",
                r.name
            );
            return false;
        }
    }
    log_msg(SUMMARY, "[send]  candidate succeeds\n");
    return true;
}

#if 0
int main() {
    vector<IP_RESULT> ip_results;
    double work_buf_min = 0;
    double cpu_pessimism_factor = 1;
    int ncpus = 1;
    bool flag;

    ip_results.push_back(IP_RESULT("R1", 5, 3));
    ip_results.push_back(IP_RESULT("R2", 5, 3));
    init_ip_results(work_buf_min, ncpus, ip_results);

    IP_RESULT c1 = IP_RESULT("C1", 10, 1);
    if (check_candidate(c1, ncpus, ip_results)) {
        printf("adding %s\n", c1.name);
        ip_results.push_back(c1);
    }

    IP_RESULT c2 = IP_RESULT("C2", 7, 2);
    if (check_candidate(c2, ncpus, ip_results)) {
        printf("adding %s\n", c2.name);
        ip_results.push_back(c2);
    }
}
#endif