/*  jobcenter.cpp
 *
 *  Copyright (C) 2010-2012 Andreas von Manteuffel
 *  Copyright (C) 2010-2012 Cedric Studerus
 *
 *  This file is part of the package Reduze 2.
 *  It is distributed under the GNU General Public License version 3
 *  (see the file GPL-3.0.txt or http://www.gnu.org/licenses/gpl-3.0.txt).
 */

#ifdef HAVE_MPI

#include "jobcenter.h"
#include "files.h"
#include "job.h"
#include "functions.h"
#include <cmath>
#include <iomanip>
#include <unistd.h> // usleep
#include "job_setupsectormappings.h"
#include "job_setupsectormappingsalt.h"
#include "job_verifypermutationsymmetries.h"

/* The communication protocol:
 *
 * Customer-JobCenter communications:
 * ----------------------------------
 * These are initiated in one of these 2 ways:
 * - non-employer customers send a request to the JobCenter
 * - employer customers receive a request from the JobCenter
 *
 * Employer-JobCenter communications:
 * ----------------------------------
 * A manager can become an employer if it registers at the JobCenter as such.
 * The JobCenter will then assign workers to the employer. An employer must
 * at some stage unregister as employer again.
 * If an employer accepts a command sent by the JobCenter, it answers with
 * the same tag. Answering with a different tag means denial of the command.
 * If an employer refuses to accept a new worker, it must answer with
 * PerformanceData and indicate it wants no more workers.
 * If an employer wants to unregister, it must answer with the corresponding
 * tag.
 */

using namespace std;

namespace Reduze {

JobCenter::JobCenter(MPI::Intracomm* comm, JobQueue* jobqueue) :
	//
			worker_distribution_halfwidth_efficiency_(0.8),//
			worker_distribution_steepness_(10.),//
			comm(comm), jobqueue_(jobqueue) {
	n_customers = comm->Get_size() - 1;
}

JobCenter::~JobCenter() {
}

double raw_worker_distribution(double e, double e0, double c) {
	return M_PI / 2. - atan(exp(-(e - e0) * c));
}

inline int round(double x) {
	return int(x > 0.0 ? x + 0.5 : x - 0.5);
}

int JobCenter::find_num_free_workers() const {
	LOGXX("Get number of workers which could be freely reassigned");
	int num_free_workers = idle_customers.size();
	LOGXX("  idle_customers: " << idle_customers.size());
	// note: a manager can be a non-employer but still have workers associated
	map<int, ManagerPerformance>::const_iterator it;
	for (it = performance.begin(); it != performance.end(); ++it) {
		map<int, Job*>::const_iterator jit = job.find(it->first);
		if (jit == job.end()) {
			num_free_workers -= it->second.num_processes; // job ended already
		} else {
			int minprocs = 1 + jit->second->min_workers();
			num_free_workers += it->second.num_processes - minprocs;
		}
	}
	LOGXX("  got " << num_free_workers << " free workers");
	return num_free_workers;
}

int JobCenter::find_num_distributable_workers() const {
	LOGXX("Get number of workers which could be reassigned currently");
	int num_free_workers = idle_customers.size();
	LOGXX("  idle_customers: " << idle_customers.size());
	// note: a manager can be a non-employer but still have workers associated
	map<int, ManagerPerformance>::const_iterator it;
	for (it = performance.begin(); it != performance.end(); ++it) {
		const ManagerPerformance& p = it->second;
		map<int, Job*>::const_iterator jit = job.find(it->first);
		if (jit == job.end()) { // job ended already
			num_free_workers += p.num_processes;
		} else {
			int minworkers = jit->second->min_workers();
			if (p.is_employer)
				num_free_workers += p.num_processes - (1 + minworkers);
			else
				num_free_workers += p.num_processes - 1;
		}
	}
	LOGXX("  got " << num_free_workers << " distributable workers");
	return num_free_workers;
}

bool JobCenter::can_start_new_job() const {
	const Job* j = jobqueue_->provide_runnable_job();
	return (j && j->min_workers() + 1 <= find_num_free_workers());
}

void JobCenter::update_worker_distribution() {
	LOGXX("Calculating optimal distribution");
	int nfree = find_num_distributable_workers();

	// extract all employers
	map<int, double> distrib;
	// todo contains employers with job[rank] referring to a valid job
	list<map<int, ManagerPerformance>::iterator> todo;
	map<int, ManagerPerformance>::iterator it;
	for (it = performance.begin(); it != performance.end(); ++it)
		if (job.find(it->first) == job.end()) // job ended already
			it->second.num_processes_optimal = 0;
		else if (it->second.is_employer)
			todo.push_back(it);
		else
			it->second.num_processes_optimal = 1; // need to update all
	LOGXX("  find raw distribution for " << todo.size() << " employers, "
			<< nfree << " free workers");
	double sum = 0.;
	list<map<int, ManagerPerformance>::iterator>::iterator t;
	for (t = todo.begin(); t != todo.end(); ++t) {
		double e = (*t)->second.load_efficiency;
		double e0 = worker_distribution_halfwidth_efficiency_;
		double c = worker_distribution_steepness_;
		distrib[(*t)->first] = raw_worker_distribution(e, e0, c);
		sum += distrib[(*t)->first];
	}
	LOGXX("  assign special cases with " << nfree << " free workers");
	double norm = (sum > 0. ? (nfree / sum) : 0.);
	sum = 0.;
	for (t = todo.begin(); t != todo.end();) {
		int rank = (*t)->first;
		ManagerPerformance& p = (*t)->second;
		int minprocs = 1 + job[rank]->min_workers();
		int optprocs = round(norm * distrib[rank]);
		int totprocs = minprocs + optprocs;
		int maxrelease = jobqueue_->max_workers_release();
		if (!p.wants_more_workers) { // assign no more than current workers
			p.num_processes_optimal = min(p.num_processes, totprocs);
			nfree -= p.num_processes_optimal - minprocs;
			todo.erase(t++);
		} else if (totprocs >= job[rank]->max_workers()) {
			p.num_processes_optimal = 1 + job[rank]->max_workers();
			nfree -= p.num_processes_optimal - minprocs;
			todo.erase(t++);
		} else if (maxrelease > 0 && p.num_processes - totprocs > maxrelease) {
			p.num_processes_optimal = p.num_processes - maxrelease;
			nfree -= p.num_processes_optimal - minprocs;
			todo.erase(t++);
		} else {
			sum += distrib[rank];
			++t;
		}
	}
	LOGXX("  assign generic cases with " << nfree << " free workers");
	multimap<double, map<int, ManagerPerformance>::iterator> todo_sorted;
	norm = (sum > 0. ? (nfree / sum) : 0.);
	for (t = todo.begin(); t != todo.end(); ++t) {
		int rank = (*t)->first;
		ManagerPerformance& p = (*t)->second;
		int optprocs = round(norm * distrib[rank]);
		int minprocs = 1 + job[rank]->min_workers();
		p.num_processes_optimal = min(minprocs + optprocs,
				job[rank]->max_workers());
		nfree -= p.num_processes_optimal - minprocs;
		todo_sorted.insert(make_pair(p.load_efficiency, *t));
	}

	LOGXX("  account for mismatch of " << nfree << " workers");
	// for: roundoff error or no employer available
	multimap<double, map<int, ManagerPerformance>::iterator>::iterator i;
	for (i = todo_sorted.begin(); nfree < 0 && i != todo_sorted.end(); ++i) {
		ManagerPerformance& p = i->second->second;
		if (p.num_processes_optimal > 1
				+ job[i->second->first]->min_workers()) {
			--p.num_processes_optimal;
			++nfree;
		}
	}
	multimap<double, map<int, ManagerPerformance>::iterator>::reverse_iterator
			ir;
	for (ir = todo_sorted.rbegin(); nfree > 0 && ir != todo_sorted.rend(); ++ir) {
		ManagerPerformance& p = i->second->second;
		if (p.num_processes_optimal < 1
				+ job[i->second->first]->max_workers()) {
			++p.num_processes_optimal;
			--nfree;
		}
	}
	if (nfree != 0)
		LOGX("  target distribution leaves " << nfree << " workers unassigned");

#ifdef DEBUG
	for (it = performance.begin(); it != performance.end(); ++it)
	if (it->second.is_employer)
	LOGXX("  employer " << it->first << ":"
			<< " \treleff=" << it->second.load_efficiency
			<< ", \tnprocs=" << it->second.num_processes
			<< ", \tnprocsoptimal=" << it->second.num_processes_optimal);
#endif
}

int JobCenter::find_best_employer() {
	// any manager with less than minimum number of workers ?
	map<int, ManagerPerformance>::iterator it;
	for (it = performance.begin(); it != performance.end(); ++it) {
		ManagerPerformance& p = it->second;
		map<int, Job*>::const_iterator jit = job.find(it->first);
		if (jit != job.end() && p.is_employer && p.wants_more_workers && //
				p.num_processes < 1 + jit->second->min_workers())
			return it->first;
	}
	// new job possible ?
	if (can_start_new_job())
		return 0;
	// who needs the worker most based on our target distribution ?
	multimap<int, int> rank_by_delta; // index = current procs - wanted procs
	for (it = performance.begin(); it != performance.end(); ++it) {
		ManagerPerformance& p = it->second;
		map<int, Job*>::const_iterator jit = job.find(it->first);
		if (jit != job.end() && p.is_employer && p.wants_more_workers && //
				p.num_processes < 1 + jit->second->max_workers()) {
			int delta = p.num_processes - p.num_processes_optimal;
			rank_by_delta.insert(make_pair(delta, it->first));
		}
	}
	if (!rank_by_delta.empty())
		return rank_by_delta.begin()->second;
	return -1; // nothing found
}

void JobCenter::ManagerPerformance::update_walltimes() {
	double wt = timer.get_wall_time();
	timer.restart();
	walltime += wt;
	tot_walltime += num_processes * wt;
}

void JobCenter::process_customer_status(int answer, int from) {
	if (answer == StatusFinished) {
		double tcpu;
		comm->Recv(&tcpu, 1, MPI::DOUBLE, from, TagCustomer);
		LOGXX("Customer " << from << " finished [CPU: " << tcpu << " s]");
		int manager;
		if (job.find(from) != job.end())
			manager = from;
		else if (employers.find(from) != employers.end())
			manager = employers[from];
		else
			ABORT("No manager nor worker info found for customer " << from);
		ManagerPerformance& p = performance[manager];
		p.tot_cputime += tcpu;
		p.update_walltimes();
		if (from == manager) { // customer is manager
			LOG("Completed " << job_string(job[from]) << " ["
					<< fixed << setprecision(0)
					<< "time: " << p.walltime << " s, "
					<< "cpu: " << p.tot_cputime << " s, "
					<< "procs: " << setprecision(1) << p.tot_walltime/p.walltime
					<< resetiosflags(ios::fixed)
					<< "]");
			jobqueue_->set_job_status(job[from]->id(), Job::Done);
			int timeout = 30; // wait some seconds for written files to show up
			if (jobqueue_->resolve_dependencies(timeout))
				LOG("Extended " << jobqueue_);
			job.erase(from);
			print_info();
		} else { // customer is worker
			LOGX("  got worker: " << manager << " => " << from);
			// make sure to recognize if employer wants no more workers
			// note: employer might have finished already
			//if (p.is_employer && send_employer_command(CmdPerformanceData,
			//		manager))
			//	send_employer_command(CmdContinue, manager);
			employers.erase(from);
		}
		--p.num_processes;
		//LOGXX("  now " << p.num_processes << " work for " << manager);
		if (p.num_processes == 0)
			performance.erase(manager); // no more customer works on this job
		idle_customers.push(from);
		//if (from == manager) // take new free process into account
		//update_worker_distribution();
	} else if (answer == StatusRegisterAsEmployer) {
		LOGX("Registering " << from << " as employer");
		performance[from].is_employer = true;
		performance[from].wants_more_workers = true;
		//update_worker_distribution();
		//send_employer_command(CmdWorkerQuota, from);
	} else {
		ABORT("Received unknown status " << answer << " from " << from);
	}
}

bool JobCenter::send_employer_command(int cmd, int to) {
	LOGXX("Sending command " << cmd << " to employer " << to);
	comm->Send(&cmd, 1, MPI::INT, to, TagEmployer);
	int answer;
	LOGXX("Receiving response");
	comm->Recv(&answer, 1, MPI::INT, to, TagEmployer);
	LOGXX("Got response " << answer);
	ManagerPerformance& p = performance[to];
	if (answer == StatusUnregisterAsEmployer) {
		LOGX("Unregistering " << to << " as employer");
		p.is_employer = false;
		//update_worker_distribution();
	} else if (answer == CmdPerformanceData) {
		double eff;
		comm->Recv(&performance[to].progress, 1, MPI::DOUBLE, to, TagEmployer);
		comm->Recv(&eff, 1, MPI::DOUBLE, to, TagEmployer);
		if (eff >= 0.) // negative value means no measurement available
			performance[to].load_efficiency = eff;
		comm->Recv(&eff, 1, MPI::DOUBLE, to, TagEmployer);
		if (eff >= 0.) // negative value means no measurement available
			performance[to].worker_efficiency = eff;
		comm->Recv(&p.wants_more_workers, 1, MPI::BOOL, to, TagEmployer);
		if (!p.wants_more_workers) {
			LOGXX("  employer " << to << " wants no more workers");
			//update_worker_distribution();
		}
	} else if (answer == CmdWorkerQuota) {
		// send target number instead of delta, we know workers only with delay
		int nworkers = max(0, p.num_processes_optimal - 1);
		comm->Send(&nworkers, 1, MPI::INT, to, TagEmployer);
		LOGXX("  workers will be adjusted on " << to);
	} else if (answer == CmdAssignWorker) {
		LOGXX("  employer " << to << " accepted a new worker");
	} else if (answer == CmdContinue) {
	} else if (answer == CmdExit) {
	} else {
		ABORT("Unknown employer response " << answer << " from " << to);
	}
	return answer == cmd;
}

bool JobCenter::assign_worker(int employer, int worker) {
	// increase worker quota first if necessary
	ManagerPerformance& p = performance[employer];
	if (p.num_processes >= p.num_processes_optimal) {
		p.num_processes_optimal = p.num_processes + 1;
		if (!send_employer_command(CmdWorkerQuota, employer))
			return false;
	}
	// offer the worker
	if (!send_employer_command(CmdAssignWorker, employer))
		return false;
	assign_job(job[employer], employer, worker); // inform worker
	comm->Send(&worker, 1, MPI::INT, employer, TagEmployer); // inform employer
	employers[worker] = employer;
	LOGX("  assigned worker: " << worker << " => " << employer);
	return true;
}

void JobCenter::assign_job(Job* j, int manager, int customer) {
	ASSERT(j != 0);
	int cmd = CmdPerformJob;
	comm->Send(&cmd, 1, MPI::INT, customer, TagCustomer);
	comm->Send(&manager, 1, MPI::INT, customer, TagCustomer);
	YAML::Emitter ys;
	j->print_with_type(ys);
	const char* jobstr = ys.c_str();
	comm->Send(jobstr, ys.size(), MPI::CHAR, customer, TagCustomer);
	unsigned jobid = j->id();
	comm->Send(&jobid, 1, MPI::UNSIGNED, customer, TagCustomer);
	jobqueue_->set_job_status(j->id(), Job::Running);
	if (customer == manager) {
		LOGX("");
		LOG("Started   " << job_string(j));
		LOGX(jobstr << "\n");
		job[manager] = j;
		performance[manager] = ManagerPerformance();
		print_info();
	} else {
		performance[manager].num_processes++;
	}
}

std::string JobCenter::get_info(int manager_rank) {
	stringstream ss;
	Job* j = job[manager_rank];
	if (j == 0) {
		ss << "no job found for " << manager_rank;
	} else {
		ss << job_string(j) << " on " << manager_rank;
		ManagerPerformance& p = performance[manager_rank];
		if (p.is_employer)
			ss << " parallel ["//
					<< "procs: " << p.num_processes << ", "//
					<< "done: " << fixed << setprecision(2) //
					<< p.progress << ", "//
					<< "releff: " << fixed << setprecision(2)//
					<< p.load_efficiency << ", "//
					<< "abseff: " << fixed << setprecision(2)//
					<< p.worker_efficiency << ", ";
		else
			ss << " serial [" << fixed;
		ss << fixed //
				<< "time: " << setprecision(0) << p.walltime << "s, "//
				<< "cpu: " << p.tot_cputime << "s, "//
				<< "<procs>: " << setprecision(1) //
				<< (p.walltime > 0 ? p.tot_walltime / p.walltime : 0.)//
				<< resetiosflags(ios::fixed) << "]";
	}
	return ss.str();
}

string JobCenter::job_string(const Job* j) const {
	return jobqueue_->job_string(j);
}

void JobCenter::print_info() {
	LOGX("Running jobs:");
	map<int, Job*>::const_iterator it;
	for (it = job.begin(); it != job.end(); ++it)
		LOGX("  " << get_info(it->first));
	bool verbose = false;
	if (verbose) {
		string fn = Files::instance()->get_tmp_directory() + "jobqueue.dot";
		string fntmp = fn + ".tmp";
		jobqueue_->print_dot(fntmp);
		rename(fntmp, fn);
	}
}

// In a parallel environment we must make sure we use coherent global data,
// we enforce it for SectorMappings by requiring that any update job comes
// at the beginning of the job list. We further assume here:
// - only SetupSectorMappings and SetupSectorMappingsAlt update sector mappings
// - these jobs run serial only

bool is_updating_sectormappings(const Job* j) {
	return dynamic_cast<const SetupSectorMappings*>(j) != 0 || //
			dynamic_cast<const SetupSectorMappingsAlt*>(j) != 0;
}

//void verify_sectormappings_update_constraints(/*const*/JobQueue* q) {
//	bool found_non_setup_job = false;
//	list<Job*>::const_iterator j;
//	for (j = q->jobs().begin(); j != q->jobs().end(); ++j)
//		if (is_allowed_to_be_before_sectormappings(*j))
//			continue;
//		if (!is_updating_sectormappings(*j)) {
//			found_non_setup_job = true;
//		} else if (found_non_setup_job) {
//			ERROR("Invalid order of jobs in queue:"
//					"\nJobs to setup or update sector mappings must come at"
//					" top of job list.");
//		}
//	LOGX("Verified no sector mappings updates come late in the job list");
//}

void JobCenter::run() {
	LOG("Starting JobCenter");
	for (int i = 1; i <= (int) n_customers; ++i)
		idle_customers.push(i);
	CountdownTimer terminate_timer(jobqueue_->timeout());
	CountdownTimer analysis_timer(jobqueue_->time_interval_analysis());
	MPI::Status status;

	while (jobqueue_->insert_next_joblist()) {
		run_inserted_jobs(terminate_timer, analysis_timer, status);
	}

	if (jobqueue_->is_completed()) {
		LOG("\nCompleted all jobs");
	} else if (jobqueue_->provide_runnable_job() != 0) { // can't process runnable job
		ABORT("Not enough processes available to run all jobs with" //
				<< " given settings:" << "\n" << *jobqueue_ << "\n\n"//
				<< "Note: distributed runs need one additional MPI process for"//
				<< " the job center plus one additional manager process for each"//
				<< " distributed reduction.");
	} else {
		ABORT("Failed to complete all jobs for some unknown reason");
	}

	while (!idle_customers.empty()) {
		int cmd = CmdExit;
		comm->Send(&cmd, 1, MPI::INT, front_pop(idle_customers), TagCustomer);
	}
}

void JobCenter::run_inserted_jobs(CountdownTimer& terminate_timer,
		CountdownTimer& analysis_timer, MPI::Status& status) {
	//verify_sectormappings_update_constraints(jobqueue_);
	print_info();
	bool is_sectormappings_in_progress = false;
	while (true) {
		bool have_activity = false;
		if (terminate_timer.is_elapsed()) {
			LOG("Timeout limit of " << jobqueue_->timeout() << " s hit");
			LOG("Flushing data to disk before termination");
			map<int, ManagerPerformance>::iterator it;
			for (it = performance.begin(); it != performance.end(); ++it)
				if (it->second.is_employer) {
					send_employer_command(CmdExit, it->first);
					LOG("  employer " << it->first << " flushed its data");
				}
			LOG("Terminating");
			r_abort(99);
		}
		if (comm->Iprobe(MPI::ANY_SOURCE, TagCustomer, status)) {
			// process answer from a non-employer customer
			int answer;
			int from = status.Get_source();
			comm->Recv(&answer, 1, MPI::INT, from, TagCustomer);
			if (is_sectormappings_in_progress && answer == StatusFinished) {
				Files::instance()->clear_sectormappings();
				is_sectormappings_in_progress = false;
			}
			process_customer_status(answer, from);
			have_activity = true;
		}
		if (!idle_customers.empty()) {
			LOGXX("Searching work for " << idle_customers.front());
			// assign work to an idle customer
			int employer = find_best_employer();
			LOGXX("  found employer " << employer);
			if (employer > 0) {
				int customer = idle_customers.front(); // front_pop(idle_customers);
				if (assign_worker(employer, customer))
					idle_customers.pop();
				have_activity = true;
			} else if (employer == 0) {
				int customer = front_pop(idle_customers);
				Job* j = jobqueue_->provide_runnable_job();
				assign_job(j, customer, customer);
				have_activity = true;
				if (is_updating_sectormappings(j))
					is_sectormappings_in_progress = true;
			} else if (idle_customers.size() == n_customers) {
				break;
			}
		}
		if (analysis_timer.is_elapsed()) {
			// update load balancing data
			LOGX("\nAnalyze if workers should be rebalanced");
			map<int, ManagerPerformance>::iterator it;
			for (it = performance.begin(); it != performance.end(); ++it) {
				it->second.update_walltimes();
				if (it->second.is_employer)
					send_employer_command(CmdPerformanceData, it->first);
			}
			update_worker_distribution();
			print_info();
			for (it = performance.begin(); it != performance.end(); ++it) {
				ManagerPerformance& p = it->second;
				if (!p.is_employer)
					continue;
				int n = max(0, p.num_processes - p.num_processes_optimal);
				if (n > 0)
					LOGX("  request release of " << n << " workers from " << it->first);
				// send new amount also if 0 (reset old pending amount)
				send_employer_command(CmdWorkerQuota, it->first);
			}
			analysis_timer.restart();
			have_activity = true;
		}
		if (!have_activity)
			usleep(100000);
	}

//	if (jobqueue_->is_completed()) {
//		LOG("\nCompleted all jobs");
//	} else if (jobqueue_->provide_runnable_job() != 0) { // can't process runnable job
//		ABORT("Not enough processes available to run all jobs with given settings:"
//				<< "\n" << *jobqueue_ << "\n\n"
//				<< "Note: distributed runs need one additional MPI process for"
//				<< " the job center plus one additional manager process for each"
//				<< " distributed reduction.");
//	} else {
//		ABORT("Failed to complete all jobs for some unknown reason");
//	}
//
//	while (!idle_customers.empty()) {
//		int cmd = CmdExit;
//		comm->Send(&cmd, 1, MPI::INT, front_pop(idle_customers), TagCustomer);
//	}
}

JobCenterCustomer::JobCenterCustomer(MPI::Intracomm* comm, int jobcenter_rank) :
	comm(comm), jobcenter_rank(jobcenter_rank) {
}

void JobCenterCustomer::run() {
	while (true) {
		LOGXX("Customer in run() loop");
		int cmd;
		comm->Recv(&cmd, 1, MPI::INT, jobcenter_rank, JobCenter::TagCustomer);
		if (cmd == JobCenter::CmdExit) {
			return;
		} else if (cmd == JobCenter::CmdPerformJob) {
			Timer timer;
			MPI::Status status;
			int manager_rank;
			comm->Recv(&manager_rank, 1, MPI::INT, jobcenter_rank,
					JobCenter::TagCustomer);
			comm->Probe(jobcenter_rank, JobCenter::TagCustomer, status);
			size_t rawstr_len = status.Get_count(MPI::CHAR);
			char* rawstr = new char[rawstr_len]; // no 0 at end
			comm->Recv(rawstr, rawstr_len, MPI::CHAR, jobcenter_rank,
					JobCenter::TagCustomer);
			string jobstr = string(rawstr, rawstr_len);
			delete[] rawstr;
			int jobid;
			comm->Recv(&jobid, 1, MPI::UNSIGNED, jobcenter_rank, JobCenter::TagCustomer);
			LOGX("Customer received job(" << jobid << "): " << jobstr);
			stringstream ss;
			ss.str(jobstr);
			YAML::Parser parser(ss);
			YAML::Node node;
			if (!parser.GetNextDocument(node))
				ABORT("Received malformed job description");
			Job* job;
			try {
				string key;
				node.begin().first() >> key;
				YAMLConfigurable* c =
						YAMLFactory::instance().create_object(key);
				job = dynamic_cast<Job*> (c);
				if (!job)
					throw runtime_error(key + " is not job type");
				job->read(node.begin().second());
				job->set_id(jobid);
			} catch (exception& e) {
				ABORT("Job transfer failed (probably buggy job YAML interface)\n"
						<< e.what() << "\n" << jobstr);
			}
			job->set_communication_params(comm, jobcenter_rank, manager_rank);

			LOG("Performing job " << job->to_short_string());
			if (comm->Get_rank() == manager_rank) {
				string jlogfn =
						Files::instance()->get_safe_log_filename_for_job(job);
				rlog2.set_file(jlogfn);
				Timer jobtime;
				LOGX("\nDate: " << time_string());
				LOGX("Run ID: " << RunIdentification::instance()->string()
					 << "\n");
				LOGX(jobstr << "\n");
				if (job->max_workers() == 0)
					job->run_serial();
				else
					job->run_manager(comm, jobcenter_rank);
				LOGX( "\nJob done (total time: " << fixed << setprecision(0)
						<< jobtime.get_wall_time() << resetiosflags(ios::fixed)
						<< " s)\n");
				rlog2.set_null();

			} else {
				job->run_worker(comm, manager_rank);
			}
			delete job;
			cmd = JobCenter::StatusFinished;
			comm->Send(&cmd, 1, MPI::INT, jobcenter_rank,
					JobCenter::TagCustomer);
			double tcpu = timer.get_cpu_time();
			comm->Send(&tcpu, 1, MPI::DOUBLE, jobcenter_rank,
					JobCenter::TagCustomer);
		} else {
			ABORT("Received unknown command " << cmd);
		}
	}
}

}

#endif // HAVE_MPI
