3 #define TRACEMF_USE_VERBATIM 1
4 #include "otsdaq/ARTDAQSupervisor/ARTDAQSupervisor.hh"
6 #include "artdaq-core/Utilities/configureMessageFacility.hh"
7 #include "artdaq/BuildInfo/GetPackageBuildInfo.hh"
8 #include "artdaq/DAQdata/Globals.hh"
9 #include "artdaq/ExternalComms/MakeCommanderPlugin.hh"
10 #include "cetlib_except/exception.h"
11 #include "fhiclcpp/make_ParameterSet.h"
12 #include "otsdaq/ARTDAQSupervisor/ARTDAQSupervisorTRACEController.h"
14 #include "artdaq-core/Utilities/ExceptionHandler.hh"
16 #include <boost/exception/all.hpp>
17 #include <boost/filesystem.hpp>
22 #define OUT_ON_ERR_SIZE 1000
28 #define FAKE_CONFIG_NAME "ots_config"
29 #define DAQINTERFACE_PORT \
30 std::atoi(__ENV__("ARTDAQ_BASE_PORT")) + \
31 (partition_ * std::atoi(__ENV__("ARTDAQ_PORTS_PER_PARTITION")))
34 static std::unordered_map<int, struct sigaction> old_actions =
35 std::unordered_map<int, struct sigaction>();
36 static bool sighandler_init =
false;
37 static void signal_handler(
int signum)
40 #if TRACE_REVNUM < 1459
41 TRACE_STREAMER(TLVL_ERROR, &(
"ARTDAQsupervisor")[0], 0, 0, 0)
43 TRACE_STREAMER(TLVL_ERROR, TLOG2(
"ARTDAQsupervisor", 0), 0)
45 <<
"A signal of type " << signum
46 <<
" was caught by ARTDAQSupervisor. Shutting down DAQInterface, "
47 "then proceeding with default handlers!";
53 pthread_sigmask(SIG_UNBLOCK, NULL, &set);
54 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
56 #if TRACE_REVNUM < 1459
57 TRACE_STREAMER(TLVL_ERROR, &(
"ARTDAQsupervisor")[0], 0, 0, 0)
59 TRACE_STREAMER(TLVL_ERROR, TLOG2(
"ARTDAQsupervisor", 0), 0)
61 <<
"Calling default signal handler";
64 sigaction(signum, &old_actions[signum], NULL);
65 kill(getpid(), signum);
71 sigaction(SIGINT, &old_actions[SIGINT], NULL);
72 kill(getpid(), SIGINT);
78 static std::mutex sighandler_mutex;
79 std::unique_lock<std::mutex> lk(sighandler_mutex);
83 sighandler_init =
true;
85 std::vector<int> signals = {
96 for(
auto signal : signals)
98 struct sigaction old_action;
99 sigaction(signal, NULL, &old_action);
103 if(old_action.sa_handler != SIG_IGN)
105 struct sigaction action;
106 action.sa_handler = signal_handler;
107 sigemptyset(&action.sa_mask);
108 for(
auto sigblk : signals)
110 sigaddset(&action.sa_mask, sigblk);
116 sigaction(signal, &action, NULL);
117 old_actions[signal] = old_action;
124 ARTDAQSupervisor::ARTDAQSupervisor(xdaq::ApplicationStub* stub)
126 , daqinterface_ptr_(NULL)
127 , partition_(getSupervisorProperty(
"partition", 0))
128 , daqinterface_state_(
"notrunning")
129 , runner_thread_(nullptr)
131 __SUP_COUT__ <<
"Constructor." << __E__;
134 init_sighandler(
this);
141 auto settings_file = __ENV__(
"DAQINTERFACE_SETTINGS");
142 std::ofstream o(settings_file, std::ios::trunc);
144 setenv(
"DAQINTERFACE_PARTITION_NUMBER", std::to_string(partition_).c_str(), 1);
145 auto logfileName = std::string(__ENV__(
"OTSDAQ_LOG_DIR")) +
146 "/DAQInteface/DAQInterface_partition" +
147 std::to_string(partition_) +
".log";
148 setenv(
"DAQINTERFACE_LOGFILE", logfileName.c_str(), 1);
150 o <<
"log_directory: "
151 << getSupervisorProperty(
"log_directory", std::string(__ENV__(
"OTSDAQ_LOG_DIR")))
155 const std::string record_directory = getSupervisorProperty(
156 "record_directory", ARTDAQTableBase::ARTDAQ_FCL_PATH +
"/run_records/");
157 mkdir(record_directory.c_str(), 0755);
158 o <<
"record_directory: " << record_directory << std::endl;
161 o <<
"package_hashes_to_save: "
162 << getSupervisorProperty(
"package_hashes_to_save",
"[artdaq]") << std::endl;
164 o <<
"spack_root_for_bash_scripts: "
165 << getSupervisorProperty(
"spack_root_for_bash_scripts",
166 std::string(__ENV__(
"SPACK_ROOT")))
168 o <<
"boardreader timeout: " << getSupervisorProperty(
"boardreader_timeout", 30)
170 o <<
"eventbuilder timeout: " << getSupervisorProperty(
"eventbuilder_timeout", 30)
172 o <<
"datalogger timeout: " << getSupervisorProperty(
"datalogger_timeout", 30)
174 o <<
"dispatcher timeout: " << getSupervisorProperty(
"dispatcher_timeout", 30)
177 if(!getSupervisorProperty(
"advanced_memory_usage",
false))
179 o <<
"max_fragment_size_bytes: "
180 << getSupervisorProperty(
"max_fragment_size_bytes", 1048576) << std::endl;
182 o <<
"transfer_plugin_to_use: "
183 << getSupervisorProperty(
"transfer_plugin_to_use",
"TCPSocket") << std::endl;
184 if(getSupervisorProperty(
"transfer_plugin_from_brs",
"") !=
"")
186 o <<
"transfer_plugin_from_brs: "
187 << getSupervisorProperty(
"transfer_plugin_from_brs",
"") << std::endl;
189 if(getSupervisorProperty(
"transfer_plugin_from_ebs",
"") !=
"")
191 o <<
"transfer_plugin_from_ebs: "
192 << getSupervisorProperty(
"transfer_plugin_from_ebs",
"") << std::endl;
194 if(getSupervisorProperty(
"transfer_plugin_from_dls",
"") !=
"")
196 o <<
"transfer_plugin_from_dls: "
197 << getSupervisorProperty(
"transfer_plugin_from_dls",
"") << std::endl;
199 o <<
"all_events_to_all_dispatchers: " << std::boolalpha
200 << getSupervisorProperty(
"all_events_to_all_dispatchers",
true) << std::endl;
201 o <<
"data_directory_override: "
202 << getSupervisorProperty(
"data_directory_override",
203 std::string(__ENV__(
"ARTDAQ_OUTPUT_DIR")))
205 o <<
"max_configurations_to_list: "
206 << getSupervisorProperty(
"max_configurations_to_list", 10) << std::endl;
207 o <<
"disable_unique_rootfile_labels: "
208 << getSupervisorProperty(
"disable_unique_rootfile_labels",
false) << std::endl;
209 o <<
"use_messageviewer: " << std::boolalpha
210 << getSupervisorProperty(
"use_messageviewer",
false) << std::endl;
211 o <<
"use_messagefacility: " << std::boolalpha
212 << getSupervisorProperty(
"use_messagefacility",
true) << std::endl;
213 o <<
"fake_messagefacility: " << std::boolalpha
214 << getSupervisorProperty(
"fake_messagefacility",
false) << std::endl;
215 o <<
"kill_existing_processes: " << std::boolalpha
216 << getSupervisorProperty(
"kill_existing_processes",
true) << std::endl;
217 o <<
"advanced_memory_usage: " << std::boolalpha
218 << getSupervisorProperty(
"advanced_memory_usage",
false) << std::endl;
219 o <<
"strict_fragment_id_mode: " << std::boolalpha
220 << getSupervisorProperty(
"strict_fragment_id_mode",
false) << std::endl;
221 o <<
"disable_private_network_bookkeeping: " << std::boolalpha
222 << getSupervisorProperty(
"disable_private_network_bookkeeping",
false) << std::endl;
223 o <<
"allowed_processors: " << getSupervisorProperty(
"allowed_processors",
"0-255")
230 if(CorePropertySupervisorBase::theTRACEController_)
232 __SUP_COUT__ <<
"Destroying TRACE Controller..." << __E__;
233 delete CorePropertySupervisorBase::
235 CorePropertySupervisorBase::theTRACEController_ =
nullptr;
237 CorePropertySupervisorBase::theTRACEController_ =
240 ->setSupervisorPtr(
this);
242 __SUP_COUT__ <<
"Constructed." << __E__;
246 ARTDAQSupervisor::~ARTDAQSupervisor(
void)
248 __SUP_COUT__ <<
"Destructor." << __E__;
250 __SUP_COUT__ <<
"Destructed." << __E__;
254 void ARTDAQSupervisor::destroy(
void)
256 __SUP_COUT__ <<
"Destroying..." << __E__;
258 if(daqinterface_ptr_ != NULL)
260 __SUP_COUT__ <<
"Calling recover transition" << __E__;
261 std::lock_guard<std::recursive_mutex> lk(daqinterface_mutex_);
262 PyObject* pName = PyUnicode_FromString(
"do_recover");
263 PyObject_CallMethodObjArgs(
264 daqinterface_ptr_, pName, NULL);
266 __SUP_COUT__ <<
"Making sure that correct state has been reached" << __E__;
268 while(daqinterface_state_ !=
"stopped")
271 __SUP_COUT__ <<
"State is " << daqinterface_state_
272 <<
", waiting 1s and retrying..." << __E__;
277 Py_XDECREF(daqinterface_ptr_);
287 daqinterface_ptr_ = NULL;
290 __SUP_COUT__ <<
"Flusing printouts" << __E__;
293 PyRun_SimpleString(R
"(
295 sys.stdout = sys.__stdout__
296 sys.stderr = sys.__stderr__
298 Py_XDECREF(stringIO_out);
299 Py_XDECREF(stringIO_err);
301 __SUP_COUT__ << "Thread and garbage cleanup" << __E__;
304 "import threading; [t.join() for t in threading.enumerate() if t is not "
305 "threading.main_thread()]");
306 PyRun_SimpleString(
"import gc; gc.collect()");
312 __SUP_COUT__ <<
"Destroying TRACE Controller..." << __E__;
317 __SUP_COUT__ <<
"Destroyed." << __E__;
321 void ARTDAQSupervisor::init(
void)
325 __SUP_COUT__ <<
"Initializing..." << __E__;
327 std::lock_guard<std::recursive_mutex> lk(daqinterface_mutex_);
330 artdaq::configureMessageFacility(
"ARTDAQSupervisor");
331 __SUP_COUT__ <<
"artdaq MF configured." << __E__;
334 char* daqinterface_dir = getenv(
"ARTDAQ_DAQINTERFACE_DIR");
335 if(daqinterface_dir == NULL)
337 __SS__ <<
"ARTDAQ_DAQINTERFACE_DIR environment variable not set! This "
338 "means that DAQInterface has not been setup!"
344 __SUP_COUT__ <<
"Initializing Python" << __E__;
347 __SUP_COUT__ <<
"Adding DAQInterface directory to PYTHON_PATH" << __E__;
348 PyObject* sysPath = PySys_GetObject((
char*)
"path");
349 PyObject* programName = PyUnicode_FromString(daqinterface_dir);
350 PyList_Append(sysPath, programName);
351 Py_DECREF(programName);
353 __SUP_COUT__ <<
"Creating Module name" << __E__;
354 PyObject* pName = PyUnicode_FromString(
"rc.control.daqinterface");
357 __SUP_COUT__ <<
"Importing module" << __E__;
358 PyObject* pModule = PyImport_Import(pName);
364 __SS__ <<
"Failed to load rc.control.daqinterface" << __E__;
369 __SUP_COUT__ <<
"Loading python module dictionary" << __E__;
370 PyObject* pDict = PyModule_GetDict(pModule);
374 __SS__ <<
"Unable to load module dictionary" << __E__;
381 __SUP_COUT__ <<
"Getting DAQInterface object pointer" << __E__;
382 PyObject* di_obj_ptr = PyDict_GetItemString(pDict,
"DAQInterface");
384 __SUP_COUT__ <<
"Filling out DAQInterface args struct" << __E__;
385 PyObject* pArgs = PyTuple_New(0);
387 PyObject* kwargs = Py_BuildValue(
"{s:s, s:s, s:i, s:i, s:s, s:s}",
401 __SUP_COUT__ <<
"Calling DAQInterface Object Constructor" << __E__;
405 PyObject* sys = PyImport_ImportModule(
"sys");
406 PyObject* io = PyImport_ImportModule(
"io");
409 stringIO_out = PyObject_CallMethod(io,
"StringIO", NULL);
410 stringIO_err = PyObject_CallMethod(io,
"StringIO", NULL);
417 PyObject_SetAttrString(sys,
"stdout", stringIO_out);
418 PyObject_SetAttrString(sys,
"stderr", stringIO_err);
421 daqinterface_ptr_ = PyObject_Call(di_obj_ptr, pArgs, kwargs);
426 PyObject* bad = PyObject_CallMethod(sys,
"does_not_exist", NULL);
432 PyObject_CallMethod(stringIO_err,
"getvalue", NULL);
434 __COUT__ <<
"Captured stderr:\n"
435 << PyUnicode_AsUTF8(err_text) <<
"\n";
437 __COUT__ <<
"Capture of stderr failed.";
441 Py_DECREF(di_obj_ptr);
472 __SUP_COUT__ <<
"Initialized." << __E__;
476 void ARTDAQSupervisor::transitionConfiguring(toolbox::Event::Reference )
478 __SUP_COUT__ <<
"transitionConfiguring" << __E__;
481 if(RunControlStateMachine::getIterationIndex() == 0 &&
482 RunControlStateMachine::getSubIterationIndex() == 0)
484 thread_error_message_ =
"";
485 thread_progress_bar_.resetProgressBar(0);
486 last_thread_progress_update_ = time(0);
489 SOAPUtilities::translate(theStateMachine_.getCurrentMessage())
491 .getValue(
"ConfigurationTableGroupName"),
492 TableGroupKey(SOAPUtilities::translate(theStateMachine_.getCurrentMessage())
494 .getValue(
"ConfigurationTableGroupKey")));
496 __SUP_COUT__ <<
"Configuration table group name: " << theGroup.first
497 <<
" key: " << theGroup.second << __E__;
515 ConfigurationManager::LoadGroupType::ALL_TYPES,
518 catch(
const std::runtime_error& e)
520 __SS__ <<
"Error loading table group '" << theGroup.first <<
"("
521 << theGroup.second <<
")! \n"
522 << e.what() << __E__;
523 __SUP_COUT_ERR__ << ss.str();
527 theStateMachine_.setErrorMessage(ss.str());
528 throw toolbox::fsm::exception::Exception(
531 "ARTDAQSupervisor::transitionConfiguring" ,
538 __SS__ <<
"Unknown error loading table group '" << theGroup.first <<
"("
539 << theGroup.second <<
")!" << __E__;
540 __SUP_COUT_ERR__ << ss.str();
544 theStateMachine_.setErrorMessage(ss.str());
545 throw toolbox::fsm::exception::Exception(
548 "ARTDAQSupervisor::transitionConfiguring" ,
555 std::thread(&ARTDAQSupervisor::configuringThread,
this).detach();
557 __SUP_COUT__ <<
"Configuring thread started." << __E__;
559 RunControlStateMachine::
560 indicateIterationWork();
564 std::string errorMessage;
566 std::lock_guard<std::mutex> lock(
568 errorMessage = thread_error_message_;
570 int progress = thread_progress_bar_.
read();
571 __SUP_COUTV__(errorMessage);
572 __SUP_COUTV__(progress);
573 __SUP_COUTV__(thread_progress_bar_.
isComplete());
576 if(errorMessage ==
"" &&
577 time(0) - last_thread_progress_update_ > 600)
579 __SUP_SS__ <<
"There has been no update from the configuration thread for "
580 << (time(0) - last_thread_progress_update_)
581 <<
" seconds, assuming something is wrong and giving up! "
582 <<
"Last progress received was " << progress << __E__;
583 errorMessage = ss.str();
586 if(errorMessage !=
"")
588 __SUP_SS__ <<
"Error was caught in configuring thread: " << errorMessage
590 __SUP_COUT_ERR__ <<
"\n" << ss.str();
592 theStateMachine_.setErrorMessage(ss.str());
593 throw toolbox::fsm::exception::Exception(
596 "CoreSupervisorBase::transitionConfiguring" ,
604 RunControlStateMachine::
605 indicateIterationWork();
607 if(last_thread_progress_read_ != progress)
609 last_thread_progress_read_ = progress;
610 last_thread_progress_update_ = time(0);
617 __SUP_COUT_INFO__ <<
"Complete configuring transition!" << __E__;
618 __SUP_COUTV__(getProcessInfo_());
626 void ARTDAQSupervisor::configuringThread()
629 std::string uid = theConfigurationManager_
630 ->
getNode(ConfigurationManager::XDAQ_APPLICATION_TABLE_NAME +
631 "/" + CorePropertySupervisorBase::getSupervisorUID() +
632 "/" +
"LinkToSupervisorTable")
635 __COUT__ <<
"Supervisor uid is " << uid <<
", getting supervisor table node" << __E__;
637 const std::string mfSubject_ = supervisorClassNoNamespace_ +
"-" + uid;
641 thread_progress_bar_.
step();
643 set_thread_message_(
"ConfigGen");
645 auto info = ARTDAQTableBase::extractARTDAQInfo(
649 getSupervisorProperty(
"max_fragment_size_bytes", 8888),
650 getSupervisorProperty(
"routing_timeout_ms", 1999),
651 getSupervisorProperty(
"routing_retry_count", 12),
652 &thread_progress_bar_);
655 if(info.processes.count(ARTDAQTableBase::ARTDAQAppType::BoardReader) == 0)
657 __GEN_SS__ <<
"There must be at least one enabled BoardReader!" << __E__;
661 if(info.processes.count(ARTDAQTableBase::ARTDAQAppType::EventBuilder) == 0)
663 __GEN_SS__ <<
"There must be at least one enabled EventBuilder!" << __E__;
668 thread_progress_bar_.
step();
669 set_thread_message_(
"Writing boot.txt");
671 __GEN_COUT__ <<
"Writing boot.txt" << __E__;
673 int debugLevel = theSupervisorNode.
getNode(
"DAQInterfaceDebugLevel").
getValue<
int>();
674 std::string setupScript = theSupervisorNode.
getNode(
"DAQSetupScript").
getValue();
676 std::ofstream o(ARTDAQTableBase::ARTDAQ_FCL_PATH +
"/boot.txt", std::ios::trunc);
677 o <<
"DAQ setup script: " << setupScript << std::endl;
678 o <<
"debug level: " << debugLevel << std::endl;
681 if(info.subsystems.size() > 1)
683 for(
auto& ss : info.subsystems)
687 o <<
"Subsystem id: " << ss.first << std::endl;
688 if(ss.second.destination != 0)
690 o <<
"Subsystem destination: " << ss.second.destination << std::endl;
692 for(
auto& sss : ss.second.sources)
694 o <<
"Subsystem source: " << sss << std::endl;
696 if(ss.second.eventMode)
698 o <<
"Subsystem fragmentMode: False" << std::endl;
704 for(
auto& builder : info.processes[ARTDAQTableBase::ARTDAQAppType::EventBuilder])
706 o <<
"EventBuilder host: " << builder.hostname << std::endl;
707 o <<
"EventBuilder label: " << builder.label << std::endl;
708 label_to_proc_type_map_[builder.label] =
"EventBuilder";
709 if(builder.subsystem != 1)
711 o <<
"EventBuilder subsystem: " << builder.subsystem << std::endl;
713 if(builder.allowed_processors !=
"")
715 o <<
"EventBuilder allowed_processors: " << builder.allowed_processors
720 for(
auto& logger : info.processes[ARTDAQTableBase::ARTDAQAppType::DataLogger])
722 o <<
"DataLogger host: " << logger.hostname << std::endl;
723 o <<
"DataLogger label: " << logger.label << std::endl;
724 label_to_proc_type_map_[logger.label] =
"DataLogger";
725 if(logger.subsystem != 1)
727 o <<
"DataLogger subsystem: " << logger.subsystem << std::endl;
729 if(logger.allowed_processors !=
"")
731 o <<
"DataLogger allowed_processors: " << logger.allowed_processors
736 for(
auto& dispatcher : info.processes[ARTDAQTableBase::ARTDAQAppType::Dispatcher])
738 o <<
"Dispatcher host: " << dispatcher.hostname << std::endl;
739 o <<
"Dispatcher label: " << dispatcher.label << std::endl;
740 o <<
"Dispatcher port: " << dispatcher.port << std::endl;
741 label_to_proc_type_map_[dispatcher.label] =
"Dispatcher";
742 if(dispatcher.subsystem != 1)
744 o <<
"Dispatcher subsystem: " << dispatcher.subsystem << std::endl;
746 if(dispatcher.allowed_processors !=
"")
748 o <<
"Dispatcher allowed_processors: " << dispatcher.allowed_processors
753 for(
auto& rmanager : info.processes[ARTDAQTableBase::ARTDAQAppType::RoutingManager])
755 o <<
"RoutingManager host: " << rmanager.hostname << std::endl;
756 o <<
"RoutingManager label: " << rmanager.label << std::endl;
757 label_to_proc_type_map_[rmanager.label] =
"RoutingManager";
758 if(rmanager.subsystem != 1)
760 o <<
"RoutingManager subsystem: " << rmanager.subsystem << std::endl;
762 if(rmanager.allowed_processors !=
"")
764 o <<
"RoutingManager allowed_processors: " << rmanager.allowed_processors
771 thread_progress_bar_.
step();
772 set_thread_message_(
"Writing Fhicl Files");
774 __GEN_COUT__ <<
"Building configuration directory" << __E__;
776 boost::system::error_code ignored;
777 boost::filesystem::remove_all(ARTDAQTableBase::ARTDAQ_FCL_PATH + FAKE_CONFIG_NAME,
779 mkdir((ARTDAQTableBase::ARTDAQ_FCL_PATH + FAKE_CONFIG_NAME).c_str(), 0755);
781 for(
auto& reader : info.processes[ARTDAQTableBase::ARTDAQAppType::BoardReader])
783 symlink(ARTDAQTableBase::getFlatFHICLFilename(
784 ARTDAQTableBase::ARTDAQAppType::BoardReader, reader.label)
786 (ARTDAQTableBase::ARTDAQ_FCL_PATH + FAKE_CONFIG_NAME +
"/" +
787 reader.label +
".fcl")
790 for(
auto& builder : info.processes[ARTDAQTableBase::ARTDAQAppType::EventBuilder])
792 symlink(ARTDAQTableBase::getFlatFHICLFilename(
793 ARTDAQTableBase::ARTDAQAppType::EventBuilder, builder.label)
795 (ARTDAQTableBase::ARTDAQ_FCL_PATH + FAKE_CONFIG_NAME +
"/" +
796 builder.label +
".fcl")
799 for(
auto& logger : info.processes[ARTDAQTableBase::ARTDAQAppType::DataLogger])
801 symlink(ARTDAQTableBase::getFlatFHICLFilename(
802 ARTDAQTableBase::ARTDAQAppType::DataLogger, logger.label)
804 (ARTDAQTableBase::ARTDAQ_FCL_PATH + FAKE_CONFIG_NAME +
"/" +
805 logger.label +
".fcl")
808 for(
auto& dispatcher : info.processes[ARTDAQTableBase::ARTDAQAppType::Dispatcher])
810 symlink(ARTDAQTableBase::getFlatFHICLFilename(
811 ARTDAQTableBase::ARTDAQAppType::Dispatcher, dispatcher.label)
813 (ARTDAQTableBase::ARTDAQ_FCL_PATH + FAKE_CONFIG_NAME +
"/" +
814 dispatcher.label +
".fcl")
817 for(
auto& rmanager : info.processes[ARTDAQTableBase::ARTDAQAppType::RoutingManager])
819 symlink(ARTDAQTableBase::getFlatFHICLFilename(
820 ARTDAQTableBase::ARTDAQAppType::RoutingManager, rmanager.label)
822 (ARTDAQTableBase::ARTDAQ_FCL_PATH + FAKE_CONFIG_NAME +
"/" +
823 rmanager.label +
".fcl")
827 thread_progress_bar_.
step();
829 std::lock_guard<std::recursive_mutex> lk(daqinterface_mutex_);
831 if(daqinterface_state_ !=
"stopped" && daqinterface_state_ !=
"")
833 __GEN_SS__ <<
"Cannot configure DAQInterface because it is in the wrong state"
834 <<
" (" << daqinterface_state_ <<
" != stopped)!" << __E__;
838 set_thread_message_(
"Calling setdaqcomps");
839 __GEN_COUT__ <<
"Calling setdaqcomps" << __E__;
840 __GEN_COUT__ <<
"Status before setdaqcomps: " << daqinterface_state_ << __E__;
841 PyObject* pName1 = PyUnicode_FromString(
"setdaqcomps");
843 PyObject* readerDict = PyDict_New();
844 for(
auto& reader : info.processes[ARTDAQTableBase::ARTDAQAppType::BoardReader])
846 label_to_proc_type_map_[reader.label] =
"BoardReader";
847 PyObject* readerName = PyUnicode_FromString(reader.label.c_str());
849 int list_size = reader.allowed_processors !=
"" ? 4 : 3;
851 PyObject* readerData = PyList_New(list_size);
852 PyObject* readerHost = PyUnicode_FromString(reader.hostname.c_str());
853 PyObject* readerPort = PyUnicode_FromString(
"-1");
854 PyObject* readerSubsystem =
855 PyUnicode_FromString(std::to_string(reader.subsystem).c_str());
856 PyList_SetItem(readerData, 0, readerHost);
857 PyList_SetItem(readerData, 1, readerPort);
858 PyList_SetItem(readerData, 2, readerSubsystem);
859 if(reader.allowed_processors !=
"")
861 PyObject* readerAllowedProcessors =
862 PyUnicode_FromString(reader.allowed_processors.c_str());
863 PyList_SetItem(readerData, 3, readerAllowedProcessors);
865 PyDict_SetItem(readerDict, readerName, readerData);
868 PyObject_CallMethodObjArgs(daqinterface_ptr_, pName1, readerDict, NULL);
869 __COUT_MULTI_LBL__(0, captureStderrAndStdout_(
"setdaqcomps"),
"setdaqcomps");
871 Py_DECREF(readerDict);
875 std::string err = capturePyErr(
"setdaqcomps");
876 __GEN_SS__ <<
"Error calling setdaqcomps transition: " << err << __E__;
881 __GEN_COUT__ <<
"Status after setdaqcomps: " << daqinterface_state_ << __E__;
883 thread_progress_bar_.
step();
884 set_thread_message_(
"Calling do_boot");
885 __GEN_COUT__ <<
"Calling do_boot" << __E__;
886 __GEN_COUT__ <<
"Status before boot: " << daqinterface_state_ << __E__;
887 PyObject* pName2 = PyUnicode_FromString(
"do_boot");
888 PyObject* pStateArgs1 =
889 PyUnicode_FromString((ARTDAQTableBase::ARTDAQ_FCL_PATH +
"/boot.txt").c_str());
891 PyObject_CallMethodObjArgs(daqinterface_ptr_, pName2, pStateArgs1, NULL);
892 std::string doBootOutput = captureStderrAndStdout_(
"do_boot");
893 __COUT_MULTI_LBL__(0, doBootOutput,
"do_boot");
897 std::string err = capturePyErr();
898 __GEN_COUT__ <<
"Error on first boost attempt, recovering and retrying: " << err
901 PyObject* pName = PyUnicode_FromString(
"do_recover");
902 PyObject* res = PyObject_CallMethodObjArgs(daqinterface_ptr_, pName, NULL);
903 __COUT_MULTI_LBL__(0, captureStderrAndStdout_(
"do_recover"),
"do_recover");
907 std::string err = capturePyErr();
908 __GEN_SS__ <<
"Error calling recover transition!!!! " << err << __E__;
909 if(doBootOutput.size() > OUT_ON_ERR_SIZE)
910 ss <<
"... last " << OUT_ON_ERR_SIZE
911 <<
" characters: " << doBootOutput.substr(doBootOutput.size() - 1000);
917 thread_progress_bar_.
step();
918 set_thread_message_(
"Calling do_boot (retry)");
919 __GEN_COUT__ <<
"Calling do_boot again" << __E__;
920 __GEN_COUT__ <<
"Status before boot: " << daqinterface_state_ << __E__;
922 PyObject_CallMethodObjArgs(daqinterface_ptr_, pName2, pStateArgs1, NULL);
923 doBootOutput = captureStderrAndStdout_(
"do_boot (retry)");
924 __COUT_MULTI_LBL__(0, doBootOutput,
"do_boot (retry)");
928 std::string err = capturePyErr();
929 __GEN_SS__ <<
"Error calling boot transition (2nd try): " << err << __E__;
930 if(doBootOutput.size() > OUT_ON_ERR_SIZE)
931 ss <<
"... last " << OUT_ON_ERR_SIZE
932 <<
" characters: " << doBootOutput.substr(doBootOutput.size() - 1000);
940 if(daqinterface_state_ !=
"booted")
942 __GEN_SS__ <<
"DAQInterface boot transition failed! "
943 <<
"Status after boot attempt: " << daqinterface_state_ << __E__;
944 if(doBootOutput.size() > OUT_ON_ERR_SIZE)
945 ss <<
"... last " << OUT_ON_ERR_SIZE
946 <<
" characters: " << doBootOutput.substr(doBootOutput.size() - 1000);
951 __GEN_COUT__ <<
"Status after boot: " << daqinterface_state_ << __E__;
953 thread_progress_bar_.
step();
954 set_thread_message_(
"Calling do_config");
955 __GEN_COUT__ <<
"Calling do_config" << __E__;
956 __GEN_COUT__ <<
"Status before config: " << daqinterface_state_ << __E__;
957 std::string doConfigOutput =
"";
959 PyObject* pName3 = PyUnicode_FromString(
"do_config");
960 PyObject* pStateArgs2 = Py_BuildValue(
"[s]", FAKE_CONFIG_NAME);
962 PyObject_CallMethodObjArgs(daqinterface_ptr_, pName3, pStateArgs2, NULL);
963 doConfigOutput = captureStderrAndStdout_(
"do_config");
964 __COUT_MULTI_LBL__(0, doConfigOutput,
"do_config");
967 std::string err = capturePyErr(
"do_config");
968 __GEN_SS__ <<
"Error calling config transition: " << err << __E__;
971 const char* res_cstr = PyUnicode_AsUTF8(res3);
972 __SUP_COUTT__ <<
"do_config result=" << (res_cstr ? res_cstr :
"") << __E__;
976 if(daqinterface_state_ !=
"ready")
978 __GEN_SS__ <<
"DAQInterface config transition failed!" << __E__
979 <<
"Supervisor state: \"" << daqinterface_state_ <<
"\" != \"ready\" "
981 auto doConfigOutput_recover_i =
982 doConfigOutput.find(
"RECOVER transition underway");
983 if(doConfigOutput_recover_i == std::string::npos)
984 ss << doConfigOutput;
985 else if(doConfigOutput_recover_i >
987 ss <<
"... tail of " << OUT_ON_ERR_SIZE <<
" characters before recovery: "
988 << doConfigOutput.substr(
989 doConfigOutput_recover_i - OUT_ON_ERR_SIZE +
990 std::string(
"RECOVER transition underway").size(),
993 ss << doConfigOutput.substr(
995 doConfigOutput_recover_i +
996 std::string(
"RECOVER transition underway").size());
999 __GEN_COUT__ <<
"Status after config: " << daqinterface_state_ << __E__;
1001 set_thread_message_(
"Configured");
1002 __GEN_COUT__ <<
"Configured." << __E__;
1005 catch(
const std::runtime_error& e)
1007 set_thread_message_(
"ERROR");
1008 __SS__ <<
"Error was caught while configuring: " << e.what() << __E__;
1009 __COUT_ERR__ <<
"\n" << ss.str();
1010 std::lock_guard<std::mutex> lock(thread_mutex_);
1011 thread_error_message_ = ss.str();
1015 set_thread_message_(
"ERROR");
1016 __SS__ <<
"Unknown error was caught while configuring. Please checked the logs."
1018 __COUT_ERR__ <<
"\n" << ss.str();
1020 artdaq::ExceptionHandler(artdaq::ExceptionHandlerRethrow::no, ss.str());
1022 std::lock_guard<std::mutex> lock(thread_mutex_);
1023 thread_error_message_ = ss.str();
1030 set_thread_message_(
"Halting");
1031 __SUP_COUT__ <<
"Halting..." << __E__;
1032 std::lock_guard<std::recursive_mutex> lk(daqinterface_mutex_);
1034 __SUP_COUT__ <<
"Status before halt: " << daqinterface_state_ << __E__;
1036 if(daqinterface_state_ ==
"running")
1039 PyObject* pName = PyUnicode_FromString(
"do_stop_running");
1040 PyObject* res = PyObject_CallMethodObjArgs(daqinterface_ptr_, pName, NULL);
1042 0, captureStderrAndStdout_(
"do_stop_running"),
"do_stop_running");
1046 std::string err = capturePyErr();
1047 __SS__ <<
"Error calling DAQ Interface stop transition: " << err << __E__;
1052 PyObject* pName = PyUnicode_FromString(
"do_command");
1053 PyObject* pArg = PyUnicode_FromString(
"Shutdown");
1054 PyObject* res = PyObject_CallMethodObjArgs(daqinterface_ptr_, pName, pArg, NULL);
1056 0, captureStderrAndStdout_(
"do_command Shutdown"),
"do_command Shutdown");
1060 std::string err = capturePyErr();
1061 __SS__ <<
"Error calling DAQ Interface halt transition: " << err << __E__;
1066 __SUP_COUT__ <<
"Status after halt: " << daqinterface_state_ << __E__;
1067 __SUP_COUT__ <<
"Halted." << __E__;
1068 set_thread_message_(
"Halted");
1070 catch(
const std::runtime_error& e)
1072 const std::string transitionName =
"Halting";
1074 if(theStateMachine_.getProvenanceStateName() ==
1075 RunControlStateMachine::FAILED_STATE_NAME ||
1076 theStateMachine_.getProvenanceStateName() ==
1077 RunControlStateMachine::HALTED_STATE_NAME)
1079 __SUP_COUT_INFO__ <<
"Error was caught while halting (but ignoring because "
1080 "previous state was '"
1081 << RunControlStateMachine::FAILED_STATE_NAME
1082 <<
"'): " << e.what() << __E__;
1086 __SUP_SS__ <<
"Error was caught while " << transitionName <<
": " << e.what()
1088 __SUP_COUT_ERR__ <<
"\n" << ss.str();
1089 theStateMachine_.setErrorMessage(ss.str());
1090 throw toolbox::fsm::exception::Exception(
1091 "Transition Error" ,
1093 "ARTDAQSupervisorBase::transition" + transitionName ,
1101 const std::string transitionName =
"Halting";
1103 if(theStateMachine_.getProvenanceStateName() ==
1104 RunControlStateMachine::FAILED_STATE_NAME ||
1105 theStateMachine_.getProvenanceStateName() ==
1106 RunControlStateMachine::HALTED_STATE_NAME)
1108 __SUP_COUT_INFO__ <<
"Unknown error was caught while halting (but ignoring "
1109 "because previous state was '"
1110 << RunControlStateMachine::FAILED_STATE_NAME <<
"')." << __E__;
1114 __SUP_SS__ <<
"Unknown error was caught while " << transitionName
1115 <<
". Please checked the logs." << __E__;
1116 __SUP_COUT_ERR__ <<
"\n" << ss.str();
1117 theStateMachine_.setErrorMessage(ss.str());
1119 artdaq::ExceptionHandler(artdaq::ExceptionHandlerRethrow::no, ss.str());
1121 throw toolbox::fsm::exception::Exception(
1122 "Transition Error" ,
1124 "ARTDAQSupervisorBase::transition" + transitionName ,
1135 set_thread_message_(
"Initializing");
1136 __SUP_COUT__ <<
"Initializing..." << __E__;
1138 __SUP_COUT__ <<
"Initialized." << __E__;
1139 set_thread_message_(
"Initialized");
1141 catch(
const std::runtime_error& e)
1143 __SS__ <<
"Error was caught while Initializing: " << e.what() << __E__;
1148 __SS__ <<
"Unknown error was caught while Initializing. Please checked the logs."
1150 artdaq::ExceptionHandler(artdaq::ExceptionHandlerRethrow::no, ss.str());
1155 void ARTDAQSupervisor::transitionPausing(toolbox::Event::Reference )
1158 set_thread_message_(
"Pausing");
1159 __SUP_COUT__ <<
"Pausing..." << __E__;
1160 std::lock_guard<std::recursive_mutex> lk(daqinterface_mutex_);
1163 __SUP_COUT__ <<
"Status before pause: " << daqinterface_state_ << __E__;
1165 PyObject* pName = PyUnicode_FromString(
"do_command");
1166 PyObject* pArg = PyUnicode_FromString(
"Pause");
1167 PyObject* res = PyObject_CallMethodObjArgs(daqinterface_ptr_, pName, pArg, NULL);
1169 0, captureStderrAndStdout_(
"do_command Pause"),
"do_command Pause");
1173 std::string err = capturePyErr();
1174 __SS__ <<
"Error calling DAQ Interface Pause transition: " << err << __E__;
1179 __SUP_COUT__ <<
"Status after pause: " << daqinterface_state_ << __E__;
1181 __SUP_COUT__ <<
"Paused." << __E__;
1182 set_thread_message_(
"Paused");
1184 catch(
const std::runtime_error& e)
1186 __SS__ <<
"Error was caught while Pausing: " << e.what() << __E__;
1191 __SS__ <<
"Unknown error was caught while Pausing. Please checked the logs." << __E__;
1192 artdaq::ExceptionHandler(artdaq::ExceptionHandlerRethrow::no, ss.str());
1197 void ARTDAQSupervisor::transitionResuming(toolbox::Event::Reference )
1200 set_thread_message_(
"Resuming");
1201 __SUP_COUT__ <<
"Resuming..." << __E__;
1202 std::lock_guard<std::recursive_mutex> lk(daqinterface_mutex_);
1205 __SUP_COUT__ <<
"Status before resume: " << daqinterface_state_ << __E__;
1206 PyObject* pName = PyUnicode_FromString(
"do_command");
1207 PyObject* pArg = PyUnicode_FromString(
"Resume");
1208 PyObject* res = PyObject_CallMethodObjArgs(daqinterface_ptr_, pName, pArg, NULL);
1210 0, captureStderrAndStdout_(
"do_command Resume"),
"do_command Resume");
1214 std::string err = capturePyErr();
1215 __SS__ <<
"Error calling DAQ Interface Resume transition: " << err << __E__;
1219 __SUP_COUT__ <<
"Status after resume: " << daqinterface_state_ << __E__;
1220 __SUP_COUT__ <<
"Resumed." << __E__;
1221 set_thread_message_(
"Resumed");
1223 catch(
const std::runtime_error& e)
1225 __SS__ <<
"Error was caught while Resuming: " << e.what() << __E__;
1230 __SS__ <<
"Unknown error was caught while Resuming. Please checked the logs."
1232 artdaq::ExceptionHandler(artdaq::ExceptionHandlerRethrow::no, ss.str());
1237 void ARTDAQSupervisor::transitionStarting(toolbox::Event::Reference )
1240 __SUP_COUT__ <<
"transitionStarting" << __E__;
1243 if(RunControlStateMachine::getIterationIndex() == 0 &&
1244 RunControlStateMachine::getSubIterationIndex() == 0)
1246 thread_error_message_ =
"";
1247 thread_progress_bar_.resetProgressBar(0);
1248 last_thread_progress_update_ = time(0);
1251 std::thread(&ARTDAQSupervisor::startingThread,
this).detach();
1253 __SUP_COUT__ <<
"Starting thread started." << __E__;
1255 RunControlStateMachine::
1256 indicateIterationWork();
1260 std::string errorMessage;
1262 std::lock_guard<std::mutex> lock(
1264 errorMessage = thread_error_message_;
1266 int progress = thread_progress_bar_.
read();
1267 __SUP_COUTV__(errorMessage);
1268 __SUP_COUTV__(progress);
1269 __SUP_COUTV__(thread_progress_bar_.
isComplete());
1272 if(errorMessage ==
"" &&
1273 time(0) - last_thread_progress_update_ > 600)
1275 __SUP_SS__ <<
"There has been no update from the start thread for "
1276 << (time(0) - last_thread_progress_update_)
1277 <<
" seconds, assuming something is wrong and giving up! "
1278 <<
"Last progress received was " << progress << __E__;
1279 errorMessage = ss.str();
1282 if(errorMessage !=
"")
1284 __SUP_SS__ <<
"Error was caught in starting thread: " << errorMessage
1286 __SUP_COUT_ERR__ <<
"\n" << ss.str();
1288 theStateMachine_.setErrorMessage(ss.str());
1289 throw toolbox::fsm::exception::Exception(
1290 "Transition Error" ,
1292 "CoreSupervisorBase::transitionStarting" ,
1300 RunControlStateMachine::
1301 indicateIterationWork();
1303 if(last_thread_progress_read_ != progress)
1305 last_thread_progress_read_ = progress;
1306 last_thread_progress_update_ = time(0);
1313 __SUP_COUT_INFO__ <<
"Complete starting transition!" << __E__;
1314 __SUP_COUTV__(getProcessInfo_());
1321 catch(
const std::runtime_error& e)
1323 __SS__ <<
"Error was caught while Starting: " << e.what() << __E__;
1328 __SS__ <<
"Unknown error was caught while Starting. Please checked the logs."
1330 artdaq::ExceptionHandler(artdaq::ExceptionHandlerRethrow::no, ss.str());
1335 void ARTDAQSupervisor::startingThread()
1338 std::string uid = theConfigurationManager_
1339 ->
getNode(ConfigurationManager::XDAQ_APPLICATION_TABLE_NAME +
1340 "/" + CorePropertySupervisorBase::getSupervisorUID() +
1341 "/" +
"LinkToSupervisorTable")
1344 __COUT__ <<
"Supervisor uid is " << uid <<
", getting supervisor table node" << __E__;
1345 const std::string mfSubject_ = supervisorClassNoNamespace_ +
"-" + uid;
1346 __GEN_COUT__ <<
"Starting..." << __E__;
1347 set_thread_message_(
"Starting");
1349 thread_progress_bar_.
step();
1352 std::lock_guard<std::recursive_mutex> lk(daqinterface_mutex_);
1354 __GEN_COUT__ <<
"Status before start: " << daqinterface_state_ << __E__;
1355 auto runNumber = SOAPUtilities::translate(theStateMachine_.getCurrentMessage())
1357 .getValue(
"RunNumber");
1359 thread_progress_bar_.
step();
1361 PyObject* pName = PyUnicode_FromString(
"do_start_running");
1362 int run_number = std::stoi(runNumber);
1363 PyObject* pStateArgs = PyLong_FromLong(run_number);
1365 PyObject_CallMethodObjArgs(daqinterface_ptr_, pName, pStateArgs, NULL);
1367 0, captureStderrAndStdout_(
"do_start_running"),
"do_start_running");
1369 thread_progress_bar_.
step();
1373 std::string err = capturePyErr();
1374 __SS__ <<
"Error calling start transition: " << err << __E__;
1379 thread_progress_bar_.
step();
1381 __GEN_COUT__ <<
"Status after start: " << daqinterface_state_ << __E__;
1382 if(daqinterface_state_ !=
"running")
1384 __SS__ <<
"DAQInterface start transition failed!" << __E__;
1388 thread_progress_bar_.
step();
1391 set_thread_message_(
"Started");
1392 thread_progress_bar_.
step();
1394 __GEN_COUT__ <<
"Started." << __E__;
1398 catch(
const std::runtime_error& e)
1400 __SS__ <<
"Error was caught while Starting: " << e.what() << __E__;
1401 __COUT_ERR__ <<
"\n" << ss.str();
1402 std::lock_guard<std::mutex> lock(thread_mutex_);
1403 thread_error_message_ = ss.str();
1407 __SS__ <<
"Unknown error was caught while Starting. Please checked the logs."
1409 __COUT_ERR__ <<
"\n" << ss.str();
1411 artdaq::ExceptionHandler(artdaq::ExceptionHandlerRethrow::no, ss.str());
1413 std::lock_guard<std::mutex> lock(thread_mutex_);
1414 thread_error_message_ = ss.str();
1418 void ARTDAQSupervisor::transitionStopping(toolbox::Event::Reference )
1421 __SUP_COUT__ <<
"Stopping..." << __E__;
1422 set_thread_message_(
"Stopping");
1423 std::lock_guard<std::recursive_mutex> lk(daqinterface_mutex_);
1425 __SUP_COUT__ <<
"Status before stop: " << daqinterface_state_ << __E__;
1426 PyObject* pName = PyUnicode_FromString(
"do_stop_running");
1427 PyObject* res = PyObject_CallMethodObjArgs(daqinterface_ptr_, pName, NULL);
1428 __COUT_MULTI_LBL__(0, captureStderrAndStdout_(
"do_stop_running"),
"do_stop_running");
1432 std::string err = capturePyErr();
1433 __SS__ <<
"Error calling DAQ Interface stop transition: " << err << __E__;
1437 __SUP_COUT__ <<
"Status after stop: " << daqinterface_state_ << __E__;
1438 __SUP_COUT__ <<
"Stopped." << __E__;
1439 set_thread_message_(
"Stopped");
1441 catch(
const std::runtime_error& e)
1443 __SS__ <<
"Error was caught while Stopping: " << e.what() << __E__;
1448 __SS__ <<
"Unknown error was caught while Stopping. Please checked the logs."
1450 artdaq::ExceptionHandler(artdaq::ExceptionHandlerRethrow::no, ss.str());
1455 void ots::ARTDAQSupervisor::enteringError(toolbox::Event::Reference )
1457 __SUP_COUT__ <<
"Entering error recovery state" << __E__;
1458 std::lock_guard<std::recursive_mutex> lk(daqinterface_mutex_);
1460 __SUP_COUT__ <<
"Status before error: " << daqinterface_state_ << __E__;
1462 PyObject* pName = PyUnicode_FromString(
"do_recover");
1463 PyObject* res = PyObject_CallMethodObjArgs(daqinterface_ptr_, pName, NULL);
1464 __COUT_MULTI_LBL__(0, captureStderrAndStdout_(
"do_recover"),
"do_recover");
1468 std::string err = capturePyErr();
1469 __SS__ <<
"Error calling DAQ Interface recover transition: " << err << __E__;
1473 __SUP_COUT__ <<
"Status after error: " << daqinterface_state_ << __E__;
1474 __SUP_COUT__ <<
"EnteringError DONE." << __E__;
1478 std::vector<SupervisorInfo::SubappInfo> ots::ARTDAQSupervisor::getSubappInfo(
void)
1480 auto apps = getAndParseProcessInfo_();
1481 std::vector<SupervisorInfo::SubappInfo> output;
1482 for(
auto& app : apps)
1486 info.
name = app.label;
1487 info.detail =
"Rank " + std::to_string(app.rank) +
", subsystem " +
1488 std::to_string(app.subsystem);
1489 info.lastStatusTime = time(0);
1490 info.progress = 100;
1491 info.status = artdaqStateToOtsState(app.state);
1492 info.url =
"http://" + app.host +
":" + std::to_string(app.port) +
"/RPC2";
1493 info.class_name =
"ARTDAQ " + labelToProcType_(app.label);
1495 output.push_back(info);
1501 std::string ots::ARTDAQSupervisor::capturePyErr(std::string label )
1507 PyObject* err_text = PyObject_CallMethod(stringIO_err,
"getvalue", NULL);
1508 std::string err =
"";
1510 err =
"Capture of " + label +
"PyErr failed.";
1512 err =
"Capture of " + label +
"PyErr: " + std::string(PyUnicode_AsUTF8(err_text));
1516 PyObject* r1 = PyObject_CallMethod(stringIO_err,
"seek",
"i", 0);
1518 PyObject* r2 = PyObject_CallMethod(stringIO_err,
"truncate", NULL);
1525 std::string ots::ARTDAQSupervisor::captureStderrAndStdout_(std::string label )
1530 std::string outString =
"";
1532 PyObject* out_text = PyObject_CallMethod(stringIO_out,
"getvalue", NULL);
1537 const char* out_cstr = PyUnicode_AsUTF8(out_text);
1538 if(out_cstr && strlen(out_cstr))
1539 outString =
"Captured " + label +
"stdout:\n" +
1540 std::string(out_cstr ? out_cstr :
"") +
"\n";
1542 outString =
"Captured " + label +
"stdout empty.\n";
1545 std::string errString =
"";
1546 PyObject* err_text = PyObject_CallMethod(stringIO_err,
"getvalue", NULL);
1548 __SUP_COUT__ <<
"Capture of " << label <<
"stderr failed.";
1551 const char* err_cstr = PyUnicode_AsUTF8(err_text);
1552 if(err_cstr && strlen(err_cstr))
1553 errString =
"Captured " + label +
"stderr:\n" +
1554 std::string(err_cstr ? err_cstr :
"") +
"\n";
1556 errString =
"Captured " + label +
"stderr empty.\n";
1561 PyObject* r1 = PyObject_CallMethod(stringIO_out,
"seek",
"i", 0);
1563 PyObject* r2 = PyObject_CallMethod(stringIO_out,
"truncate", NULL);
1567 PyObject* r1 = PyObject_CallMethod(stringIO_err,
"seek",
"i", 0);
1569 PyObject* r2 = PyObject_CallMethod(stringIO_err,
"truncate", NULL);
1574 return errString + outString;
1578 void ots::ARTDAQSupervisor::getDAQState_()
1581 std::lock_guard<std::recursive_mutex> lk(daqinterface_mutex_);
1583 if(daqinterface_ptr_ ==
nullptr)
1585 daqinterface_state_ =
"";
1592 PyObject* pName = PyUnicode_FromString(
"state");
1593 PyObject* pArg = PyUnicode_FromString(
"DAQInterface");
1594 PyObject* res = PyObject_CallMethodObjArgs(daqinterface_ptr_, pName, pArg, NULL);
1598 std::string err = capturePyErr(
"getDAQState_");
1599 __SS__ <<
"Retry n " << tries
1600 <<
". Error calling state function from getDAQState_() - here was the "
1606 daqinterface_state_ =
"";
1607 __COUT_ERR__ << ss.str();
1612 daqinterface_state_ = std::string(PyUnicode_AsUTF8(res));
1613 __SUP_COUTS__(2) <<
"getDAQState_ state=" << daqinterface_state_ << __E__;
1620 std::string ots::ARTDAQSupervisor::getProcessInfo_(
void)
1623 std::lock_guard<std::recursive_mutex> lk(daqinterface_mutex_);
1625 if(daqinterface_ptr_ ==
nullptr)
1630 PyObject* pName = PyUnicode_FromString(
"artdaq_process_info");
1631 PyObject* pArg = PyUnicode_FromString(
"DAQInterface");
1632 PyObject* pArg2 = PyBool_FromLong(
true);
1634 PyObject_CallMethodObjArgs(daqinterface_ptr_, pName, pArg, pArg2, NULL);
1638 std::string err = capturePyErr();
1639 __SS__ <<
"Error calling artdaq_process_info function: " << err << __E__;
1643 return std::string(PyUnicode_AsUTF8(res));
1647 std::string ots::ARTDAQSupervisor::artdaqStateToOtsState(std::string state)
1649 if(state ==
"nonexistant")
1650 return RunControlStateMachine::INITIAL_STATE_NAME;
1651 if(state ==
"Ready")
1652 return "Configured";
1653 if(state ==
"Running")
1654 return RunControlStateMachine::RUNNING_STATE_NAME;
1655 if(state ==
"Paused")
1656 return RunControlStateMachine::PAUSED_STATE_NAME;
1657 if(state ==
"Stopped")
1658 return RunControlStateMachine::HALTED_STATE_NAME;
1660 TLOG(TLVL_WARNING) <<
"Unrecognized state name " << state;
1661 return RunControlStateMachine::FAILED_STATE_NAME;
1664 std::string ots::ARTDAQSupervisor::labelToProcType_(std::string label)
1666 if(label_to_proc_type_map_.count(label))
1668 return label_to_proc_type_map_[label];
1674 std::list<ots::ARTDAQSupervisor::DAQInterfaceProcessInfo>
1675 ots::ARTDAQSupervisor::getAndParseProcessInfo_()
1677 std::list<ots::ARTDAQSupervisor::DAQInterfaceProcessInfo> output;
1678 auto info = getProcessInfo_();
1679 auto procs = tokenize_(info);
1688 std::regex re(
"(.*?) at ([^:]*):(\\d+) \\(subsystem (\\d+), rank (\\d+)\\): (.*)");
1690 for(
auto& proc : procs)
1693 if(std::regex_match(proc, match, re))
1695 DAQInterfaceProcessInfo info;
1697 info.label = match[1];
1698 info.host = match[2];
1699 info.port = std::stoi(match[3]);
1700 info.subsystem = std::stoi(match[4]);
1701 info.rank = std::stoi(match[5]);
1702 info.state = match[6];
1704 output.push_back(info);
1712 std::unique_ptr<artdaq::CommanderInterface>>>
1713 ots::ARTDAQSupervisor::makeCommandersFromProcessInfo()
1716 std::pair<DAQInterfaceProcessInfo, std::unique_ptr<artdaq::CommanderInterface>>>
1718 auto infos = getAndParseProcessInfo_();
1720 for(
auto& info : infos)
1722 artdaq::Commandable cm;
1723 fhicl::ParameterSet ps;
1725 ps.put<std::string>(
"commanderPluginType",
"xmlrpc");
1726 ps.put<
int>(
"id", info.port);
1727 ps.put<std::string>(
"server_url", info.host);
1729 output.emplace_back(std::make_pair<DAQInterfaceProcessInfo,
1730 std::unique_ptr<artdaq::CommanderInterface>>(
1731 std::move(info), artdaq::MakeCommanderPlugin(ps, cm)));
1738 std::list<std::string> ots::ARTDAQSupervisor::tokenize_(std::string
const& input)
1741 std::list<std::string> output;
1743 while(pos != std::string::npos && pos < input.size())
1745 auto newpos = input.find(
'\n', pos);
1746 if(newpos != std::string::npos)
1748 output.emplace_back(input, pos, newpos - pos);
1754 output.emplace_back(input, pos);
1763 void ots::ARTDAQSupervisor::daqinterfaceRunner_()
1765 TLOG(TLVL_TRACE) <<
"Runner thread starting";
1766 runner_running_ =
true;
1767 while(runner_running_)
1769 if(daqinterface_ptr_ != NULL)
1771 std::unique_lock<std::recursive_mutex> lk(daqinterface_mutex_);
1773 std::string state_before = daqinterface_state_;
1775 if(daqinterface_state_ ==
"running" || daqinterface_state_ ==
"ready" ||
1776 daqinterface_state_ ==
"booted")
1780 TLOG(TLVL_TRACE) <<
"Calling DAQInterface::check_proc_heartbeats";
1781 PyObject* pName = PyUnicode_FromString(
"check_proc_heartbeats");
1783 PyObject_CallMethodObjArgs(daqinterface_ptr_, pName, NULL);
1784 __COUT_MULTI_LBL__(1,
1785 captureStderrAndStdout_(
"check_proc_heartbeats"),
1786 "check_proc_heartbeats");
1788 <<
"Done with DAQInterface::check_proc_heartbeats call";
1792 runner_running_ =
false;
1793 std::string err = capturePyErr(
"check_proc_heartbeats");
1794 __SS__ <<
"Error calling check_proc_heartbeats function: " << err
1800 catch(cet::exception& ex)
1802 runner_running_ =
false;
1803 std::string err = capturePyErr(
"check_proc_heartbeats");
1804 __SS__ <<
"An cet::exception occurred while calling "
1805 "check_proc_heartbeats function "
1806 << ex.explain_self() <<
": " << err << __E__;
1810 catch(std::exception& ex)
1812 runner_running_ =
false;
1813 std::string err = capturePyErr(
"check_proc_heartbeats");
1814 __SS__ <<
"An std::exception occurred while calling "
1815 "check_proc_heartbeats function: "
1816 << ex.what() <<
"\n\n"
1823 runner_running_ =
false;
1824 std::string err = capturePyErr(
"check_proc_heartbeats");
1825 __SS__ <<
"An unknown Error occurred while calling "
1826 "check_proc_heartbeats function: "
1834 if(daqinterface_state_ != state_before)
1836 runner_running_ =
false;
1838 __SS__ <<
"DAQInterface state unexpectedly changed from "
1839 << state_before <<
" to " << daqinterface_state_
1840 <<
". Check supervisor log file for more info!" << __E__;
1852 runner_running_ =
false;
1853 TLOG(TLVL_TRACE) <<
"Runner thread complete";
1857 void ots::ARTDAQSupervisor::stop_runner_()
1859 runner_running_ =
false;
1860 if(runner_thread_ && runner_thread_->joinable())
1862 runner_thread_->join();
1863 runner_thread_.reset(
nullptr);
1868 void ots::ARTDAQSupervisor::start_runner_()
1872 std::make_unique<std::thread>(&ots::ARTDAQSupervisor::daqinterfaceRunner_,
this);
virtual void transitionHalting(toolbox::Event::Reference event) override
virtual void transitionInitializing(toolbox::Event::Reference event) override
void loadTableGroup(const std::string &tableGroupName, const TableGroupKey &tableGroupKey, bool doActivate=false, std::map< std::string, TableVersion > *groupMembers=0, ProgressBar *progressBar=0, std::string *accumulateWarnings=0, std::string *groupComment=0, std::string *groupAuthor=0, std::string *groupCreateTime=0, bool doNotLoadMember=false, std::string *groupTypeString=0, std::map< std::string, std::string > *groupAliases=0, ConfigurationManager::LoadGroupType groupTypeToLoad=ConfigurationManager::LoadGroupType::ALL_TYPES, bool ignoreVersionTracking=false)
ConfigurationTree getNode(const std::string &nodeString, bool doNotThrowOnBrokenUIDLinks=false) const
"root/parent/parent/"
ConfigurationTree getNode(const std::string &nodeName, bool doNotThrowOnBrokenUIDLinks=false) const
navigating between nodes
const std::string & getValueAsString(bool returnLinkTableValue=false) const
void getValue(T &value) const
ITRACEController * theTRACEController_
only define for an app that receives a command
bool isComplete()
get functions
int read()
if stepsToComplete==0, then define any progress as 50%, thread safe
void complete()
declare complete, thread safe
void INIT_MF(const char *name)
static std::string stackTrace(void)
std::string name
Also key in map.