otsdaq  3.09.00
ARTDAQSupervisorTRACEController.cc
1 #include "otsdaq/ARTDAQSupervisor/ARTDAQSupervisorTRACEController.h"
2 
3 #include <cstdint>
4 #include <ctime>
5 #include <set>
6 #include <sstream>
7 #include <string>
8 
9 ots::ARTDAQSupervisorTRACEController::ARTDAQSupervisorTRACEController() {}
10 
11 //==============================================================================
12 // parseOtsTraceLevels (file-local helper)
13 static size_t parseOtsTraceLevels(const std::string& otsOutput,
14  ots::ITRACEController::HostTraceLevelMap& outMap)
15 {
16  static const std::string hostTag = "#OTSTRACE-HOST ";
17  std::set<std::string> hostsSeen;
18  std::istringstream stream(otsOutput);
19  std::string line;
20  std::string curKey;
21 
22  while(std::getline(stream, line))
23  {
24  if(line.rfind(hostTag, 0) == 0)
25  {
26  std::string host = line.substr(hostTag.size());
27  while(!host.empty() &&
28  (host.back() == '\r' || host.back() == ' ' || host.back() == '\t'))
29  host.pop_back();
30  // Normalize: strip -data/-ipmi network suffixes so hosts merge
31  // with addTraceLevelsForThisHost() which uses the plain hostname.
32  auto pos = host.find("-data");
33  if(pos != std::string::npos)
34  host.erase(pos, 5);
35  pos = host.find("-ipmi");
36  if(pos != std::string::npos)
37  host.erase(pos, 5);
38  curKey = host;
39  if(!curKey.empty())
40  hostsSeen.insert(curKey);
41  continue;
42  }
43  if(line.rfind("#OTSTRACE", 0) == 0)
44  {
45  if(line.rfind("#OTSTRACE-END", 0) == 0)
46  curKey = "";
47  continue;
48  }
49  if(curKey.empty())
50  continue;
51 
52  std::istringstream iss(line);
53  std::string name, sM, sS, sT;
54  if(!(iss >> name >> sM >> sS >> sT))
55  continue;
56  try
57  {
58  uint64_t M = std::stoull(sM, nullptr, 0);
59  uint64_t S = std::stoull(sS, nullptr, 0);
60  uint64_t T = std::stoull(sT, nullptr, 0);
61  outMap[curKey][name].M = M;
62  outMap[curKey][name].S = S;
63  outMap[curKey][name].T = T;
64  }
65  catch(...)
66  {
67  continue;
68  }
69  }
70  return hostsSeen.size();
71 } // end parseOtsTraceLevels()
72 
73 const ots::ITRACEController::HostTraceLevelMap&
75 {
76  __COUT__ << "getTraceLevels() BEGIN" << __E__;
77 
78  traceLevelsMap_.clear();
79 
80  ots::ITRACEController::addTraceLevelsForThisHost();
81 
82  // If setTraceLevelMask() just ran and cached the updated host's levels
83  // (within the last 5 seconds), use that cache instead of a full ots -tt
84  // readback. The set leaf already includes a level dump, so this avoids
85  // a redundant SSH fan-out to all hosts.
86  time_t now = time(nullptr);
87  if(!lastSetLevels_.empty() && (now - lastSetTime_) < 5)
88  {
89  __COUT__ << "Using cached set-response levels (" << lastSetLevels_.size()
90  << " host key(s), age " << (now - lastSetTime_) << "s)." << __E__;
91  for(const auto& hostEntry : lastSetLevels_)
92  traceLevelsMap_[hostEntry.first] = hostEntry.second;
93  lastSetLevels_.clear();
94  }
95  else
96  {
97  lastSetLevels_.clear();
98 
99  std::string cmd = "ots -tt";
100  __COUT__ << "Primary TRACE path: " << cmd << __E__;
101 
102  std::string out;
103  try
104  {
105  out = StringMacros::exec(cmd.c_str());
106  }
107  catch(const std::exception& e)
108  {
109  __COUT_ERR__ << "'ots -tt' failed: " << e.what() << __E__;
110  }
111  catch(...)
112  {
113  __COUT_ERR__ << "'ots -tt' failed (unknown exception)." << __E__;
114  }
115 
116  size_t artdaqHostCount = parseOtsTraceLevels(out, traceLevelsMap_);
117  __COUT__ << "'ots -tt' populated " << artdaqHostCount << " artdaq host(s)."
118  << __E__;
119  }
120 
121  // Merge duplicate host keys that differ only by domain suffix (e.g. "mu2e-calo-01"
122  // from gethostname() vs "mu2e-calo-01.fnal.gov" from ots -tt). Merge the longer
123  // key's labels into the shorter key, then remove the longer key. Only merges when
124  // the short name matches exactly up to a '.' boundary — different base names or
125  // domains are never merged.
126  {
127  std::vector<std::string> keysToRemove;
128  for(auto& entry : traceLevelsMap_)
129  {
130  const std::string& key = entry.first;
131  auto dotPos = key.find('.');
132  if(dotPos == std::string::npos)
133  continue; // no domain — can't be the long form
134  std::string shortKey = key.substr(0, dotPos);
135  auto it = traceLevelsMap_.find(shortKey);
136  if(it != traceLevelsMap_.end() && it->first != key)
137  {
138  // Merge: copy labels from FQDN key into short key (short key wins on collision)
139  for(const auto& label : entry.second)
140  it->second.emplace(label.first, label.second);
141  keysToRemove.push_back(key);
142  }
143  }
144  for(const auto& k : keysToRemove)
145  traceLevelsMap_.erase(k);
146  }
147 
148  __COUT__ << "getTraceLevels() END -- traceLevelsMap_ has " << traceLevelsMap_.size()
149  << " host key(s):" << __E__;
150  for(const auto& host : traceLevelsMap_)
151  __COUT__ << " host key '" << host.first << "' with " << host.second.size()
152  << " label(s)." << __E__;
153 
154  return traceLevelsMap_;
155 } // end getTraceLevels()
156 
158  const std::string& label,
159  TraceMasks const& lvl,
160  const std::string& host /*=localhost*/,
161  std::string const& mode /*= "ALL"*/)
162 {
163  bool allMode = mode == "ALL";
164 
165  // Determine if the target host is local by comparing stripped short hostnames.
166  auto stripHost = [](const std::string& h) -> std::string {
167  std::string s = h;
168  auto pos = s.find("-data");
169  if(pos != std::string::npos)
170  s.erase(pos, 5);
171  pos = s.find("-ipmi");
172  if(pos != std::string::npos)
173  s.erase(pos, 5);
174  pos = s.find('.');
175  if(pos != std::string::npos)
176  s = s.substr(0, pos);
177  return s;
178  };
179 
180  std::string localShort = stripHost(getHostnameString());
181  std::string targetShort = stripHost(host);
182  bool isLocal = (host == "localhost" || targetShort == localShort);
183 
184  if(isLocal)
185  {
186  ots::ITRACEController::setTraceLevelsForThisHost(label, lvl, mode);
187  return;
188  }
189 
190  // Remote host: use ots -ttlvl* (parse format) via SSH
191  std::string cmd;
192  if(allMode)
193  cmd = "ots -ttlvlmsk '" + host + "' '" + label + "' " + std::to_string(lvl.M) +
194  " " + std::to_string(lvl.S) + " " + std::to_string(lvl.T);
195  else if(mode == "FAST")
196  cmd = "ots -ttlvlM '" + host + "' '" + label + "' " + std::to_string(lvl.M);
197  else if(mode == "SLOW")
198  cmd = "ots -ttlvlS '" + host + "' '" + label + "' " + std::to_string(lvl.S);
199  else if(mode == "TRIGGER")
200  cmd = "ots -ttlvlT '" + host + "' '" + label + "' " + std::to_string(lvl.T);
201 
202  if(!cmd.empty())
203  {
204  __COUT__ << "Remote TRACE set: " << cmd << __E__;
205  std::string out;
206  try
207  {
208  out = StringMacros::exec(cmd.c_str());
209  }
210  catch(...)
211  {
212  out = "";
213  }
214 
215  if(out.find("#OTSTRACE-OK") != std::string::npos)
216  {
217  lastSetLevels_.clear();
218  parseOtsTraceLevels(out, lastSetLevels_);
219  lastSetTime_ = time(nullptr);
220  __COUT__ << "Set confirmed; cached " << lastSetLevels_.size()
221  << " host(s) from response." << __E__;
222  return;
223  }
224  __COUT_ERR__ << "'ots' TRACE set did not confirm (#OTSTRACE-OK missing)."
225  << __E__;
226  }
227 } // end setTraceLevelMask()
const ITRACEController::HostTraceLevelMap & getTraceLevels(void) final
pure virtual
virtual void setTraceLevelMask(std::string const &label, TraceMasks const &lvl, std::string const &hostname="localhost", std::string const &mode="ALL") final
pure virtual
static std::string exec(const char *cmd)