Line data Source code
1 : ////////////////////////////////////////////////////////////////////////
2 : // Class: FragmentWatcher
3 : // Module Type: analyzer
4 : // File: FragmentWatcher_module.cc
5 : // Description: Collects and reports statistics on missing and empty fragments
6 : //
7 : // The model that is followed here is to publish to the metrics system
8 : // the full history of what has happened so far. In that way, each update
9 : // is self-contained. So, the map of fragment IDs that have missing or
10 : // empty fragments will contain the total number of events in which each
11 : // fragment ID was missing or empty.
12 : //
13 : // TRACE messages, though, contain a mix of per-event and overall results.
14 : // To enable TLVL_TRACE messages that have overall resuts (for debugging),
15 : // use 'tonM -n <appname>_FragmentWatcher 4'.
16 : ////////////////////////////////////////////////////////////////////////
17 :
18 : #include "TRACE/tracemf.h"
19 : #include "artdaq/DAQdata/Globals.hh"
20 : #define TRACE_NAME (app_name + "_FragmentWatcher").c_str()
21 :
22 : #include "artdaq-core/Data/ContainerFragment.hh"
23 : #include "artdaq-core/Data/Fragment.hh"
24 :
25 : #include "art/Framework/Core/EDAnalyzer.h"
26 : #include "art/Framework/Core/ModuleMacros.h"
27 : #include "art/Framework/Principal/Event.h"
28 : #include "art/Framework/Principal/Handle.h"
29 :
30 : #include <bitset>
31 : #include <iostream>
32 : #include <map>
33 :
34 : #define TLVL_BAD_FRAGMENTS TLVL_WARNING
35 : #define TLVL_EVENT_SUMMARY TLVL_TRACE
36 : #define TLVL_EXPECTED_FRAGIDS 5
37 : #define TLVL_BASIC_MODE 6
38 : #define TLVL_FRACTIONAL_MODE 7
39 :
40 : namespace artdaq {
41 : class FragmentWatcher;
42 : }
43 :
44 : /// <summary>
45 : /// An art::EDAnalyzer module which checks events for certain error conditions (missing fragments, empty fragments, etc)
46 : /// </summary>
47 : class artdaq::FragmentWatcher : public art::EDAnalyzer
48 : {
49 : public:
50 : /**
51 : * \brief FragmentWatcher Constructor
52 : * \param pset ParameterSet used to configure FragmentWatcher
53 : *
54 : * FragmentWatcher accepts the following Parameters:
55 : * mode_bitmask (default: 0x1): Mask of modes to use. BASIC_COUNTS_MODE = 0, FRACTIONAL_COUNTS_MODE = 1, DETAILED_COUNTS_MODE = 2
56 : * metrics_reporting_level (default: 1): Level to use for metrics reporting
57 : * metrics: A artdaq::MetricManager::Config ParameterSet used to configure MetricManager reporting for this module
58 : */
59 : explicit FragmentWatcher(fhicl::ParameterSet const& pset);
60 : /**
61 : * \brief Virtual Destructor. Shuts down MetricManager if one is present
62 : */
63 : ~FragmentWatcher() override;
64 :
65 : /**
66 : * \brief Analyze each event, using the configured mode bitmask
67 : * \param evt art::Event to analyze
68 : */
69 : void analyze(art::Event const& evt) override;
70 :
71 : private:
72 : FragmentWatcher(FragmentWatcher const&) = delete;
73 : FragmentWatcher(FragmentWatcher&&) = delete;
74 : FragmentWatcher& operator=(FragmentWatcher const&) = delete;
75 : FragmentWatcher& operator=(FragmentWatcher&&) = delete;
76 :
77 : std::bitset<3> mode_bitset_;
78 : int metrics_reporting_level_;
79 :
80 : int events_processed_;
81 : std::set<int> expected_fragmentID_list_;
82 :
83 : int events_with_missing_fragments_;
84 : int events_with_empty_fragments_;
85 :
86 : int events_with_10pct_missing_fragments_;
87 : int events_with_10pct_empty_fragments_;
88 : int events_with_50pct_missing_fragments_;
89 : int events_with_50pct_empty_fragments_;
90 :
91 : std::map<int, int> missing_fragments_by_fragmentID_;
92 : std::map<int, int> empty_fragments_by_fragmentID_;
93 :
94 : const int BASIC_COUNTS_MODE = 0;
95 : const int FRACTIONAL_COUNTS_MODE = 1;
96 : const int DETAILED_COUNTS_MODE = 2;
97 : };
98 :
99 0 : artdaq::FragmentWatcher::FragmentWatcher(fhicl::ParameterSet const& pset)
100 : : EDAnalyzer(pset)
101 0 : , mode_bitset_(std::bitset<3>(pset.get<int>("mode_bitmask", 0x1)))
102 0 : , metrics_reporting_level_(pset.get<int>("metrics_reporting_level", 1))
103 0 : , events_processed_(0)
104 0 : , expected_fragmentID_list_()
105 0 : , events_with_missing_fragments_(0)
106 0 : , events_with_empty_fragments_(0)
107 0 : , events_with_10pct_missing_fragments_(0)
108 0 : , events_with_10pct_empty_fragments_(0)
109 0 : , events_with_50pct_missing_fragments_(0)
110 0 : , events_with_50pct_empty_fragments_(0)
111 0 : , missing_fragments_by_fragmentID_()
112 0 : , empty_fragments_by_fragmentID_()
113 : {
114 0 : auto ids = pset.get<std::vector<int>>("fragment_ids", {});
115 0 : for (auto& id : ids)
116 : {
117 0 : expected_fragmentID_list_.insert(id);
118 : }
119 0 : }
120 :
121 0 : artdaq::FragmentWatcher::~FragmentWatcher()
122 : {
123 0 : }
124 :
125 0 : void artdaq::FragmentWatcher::analyze(art::Event const& evt)
126 : {
127 0 : events_processed_++;
128 :
129 : // get all the artdaq fragment collections in the event.
130 0 : std::vector<art::Handle<std::vector<artdaq::Fragment>>> fragmentHandles;
131 0 : fragmentHandles = evt.getMany<std::vector<artdaq::Fragment>>();
132 :
133 0 : std::set<int> missing_fragmentID_list_this_event(expected_fragmentID_list_);
134 : // Check for missing Fragment IDs, updating the master list as necessary
135 0 : for (auto const& hndl : fragmentHandles)
136 : {
137 0 : for (auto const& fragment : *hndl)
138 : {
139 0 : int fragID = fragment.fragmentID();
140 0 : if (!expected_fragmentID_list_.count(fragID))
141 : {
142 0 : TLOG(TLVL_EXPECTED_FRAGIDS) << "Inserting fragment ID " << fragID << " into the list of expected_fragmentIDs.";
143 0 : expected_fragmentID_list_.insert(fragID);
144 : }
145 0 : missing_fragmentID_list_this_event.erase(fragID);
146 : }
147 : }
148 :
149 : // track the number of missing fragments by fragment ID
150 0 : for (int const& fragID : missing_fragmentID_list_this_event)
151 : {
152 0 : if (missing_fragments_by_fragmentID_.count(fragID) == 0)
153 : {
154 0 : missing_fragments_by_fragmentID_[fragID] = 1;
155 : }
156 : else
157 : {
158 0 : missing_fragments_by_fragmentID_[fragID] += 1;
159 : }
160 : }
161 :
162 : // check if this event has any Empty fragments
163 0 : int empty_fragment_count_this_event = 0;
164 0 : std::set<int> empty_fragmentID_list_this_event;
165 0 : for (auto const& hndl : fragmentHandles)
166 : {
167 0 : std::string instance_name = hndl.provenance()->productInstanceName();
168 0 : std::size_t found = instance_name.find("Empty");
169 0 : if (found != std::string::npos)
170 : {
171 0 : empty_fragment_count_this_event += hndl->size();
172 :
173 : // track the number of empty fragments by fragment ID
174 0 : for (auto const& fragment : *hndl)
175 : {
176 0 : int fragID = fragment.fragmentID();
177 0 : if (empty_fragments_by_fragmentID_.count(fragID) == 0)
178 : {
179 0 : empty_fragments_by_fragmentID_[fragID] = 1;
180 : }
181 : else
182 : {
183 0 : empty_fragments_by_fragmentID_[fragID] += 1;
184 : }
185 0 : empty_fragmentID_list_this_event.insert(fragID);
186 : }
187 : }
188 0 : }
189 :
190 : // common metric reporting for multiple modes
191 0 : if (metricMan != nullptr && (mode_bitset_.test(BASIC_COUNTS_MODE) || mode_bitset_.test(FRACTIONAL_COUNTS_MODE)))
192 : {
193 0 : metricMan->sendMetric("EventsProcessed", events_processed_, "events", metrics_reporting_level_,
194 : artdaq::MetricMode::LastPoint);
195 : }
196 :
197 0 : size_t missing_fragment_count_this_event = missing_fragmentID_list_this_event.size();
198 0 : size_t total_fragments_this_event = expected_fragmentID_list_.size() - missing_fragment_count_this_event;
199 0 : TLOG(TLVL_EVENT_SUMMARY) << "Event " << evt.event() << ": this event: total_fragments=" << total_fragments_this_event
200 0 : << ", missing_fragments=" << missing_fragment_count_this_event << ", empty_fragments="
201 0 : << empty_fragment_count_this_event << " (" << events_processed_ << " events processed)";
202 : // log TRACE message if there are missing fragments
203 0 : if (missing_fragment_count_this_event > 0)
204 : {
205 0 : std::ostringstream oss;
206 0 : bool firstLoop = true;
207 0 : for (auto const& fragID : missing_fragmentID_list_this_event)
208 : {
209 0 : if (!firstLoop) { oss << ", "; }
210 0 : oss << fragID;
211 0 : firstLoop = false;
212 : }
213 0 : TLOG(TLVL_BAD_FRAGMENTS) << "Event " << evt.event() << ": total_fragments=" << total_fragments_this_event
214 0 : << ", fragmentIDs for " << missing_fragment_count_this_event << " missing_fragments: " << oss.str();
215 0 : }
216 : // log TRACE message if there are empty fragments
217 0 : if (!empty_fragmentID_list_this_event.empty())
218 : {
219 0 : std::ostringstream oss;
220 0 : bool firstLoop = true;
221 0 : for (auto const& fragID : empty_fragmentID_list_this_event)
222 : {
223 0 : if (!firstLoop) { oss << ", "; }
224 0 : oss << fragID;
225 0 : firstLoop = false;
226 : }
227 0 : TLOG(TLVL_BAD_FRAGMENTS) << "Event " << evt.event() << ": total_fragments=" << total_fragments_this_event
228 0 : << ", fragmentIDs for " << empty_fragment_count_this_event << " empty_fragments: " << oss.str();
229 0 : }
230 :
231 : // reporting for the BASIC_COUNTS_MODE
232 0 : if (metricMan != nullptr && mode_bitset_.test(BASIC_COUNTS_MODE))
233 : {
234 0 : if (missing_fragment_count_this_event > 0) { ++events_with_missing_fragments_; }
235 0 : if (empty_fragment_count_this_event > 0) { ++events_with_empty_fragments_; }
236 :
237 0 : metricMan->sendMetric("EventsWithMissingFragments", events_with_missing_fragments_, "events",
238 : metrics_reporting_level_, artdaq::MetricMode::LastPoint);
239 0 : metricMan->sendMetric("EventsWithEmptyFragments", events_with_empty_fragments_, "events",
240 : metrics_reporting_level_, artdaq::MetricMode::LastPoint);
241 :
242 0 : TLOG(TLVL_BASIC_MODE) << "Event " << evt.event() << ": events_with_missing_fragments=" << events_with_missing_fragments_
243 0 : << ", events_with_empty_fragments=" << events_with_empty_fragments_;
244 : }
245 :
246 : // reporting for the FRACTIONAL_COUNTS_MODE
247 0 : if (metricMan != nullptr && mode_bitset_.test(FRACTIONAL_COUNTS_MODE))
248 : {
249 0 : if (((static_cast<double>(missing_fragment_count_this_event) * 100.0) / static_cast<double>(expected_fragmentID_list_.size())) >= 10.0)
250 : {
251 0 : ++events_with_10pct_missing_fragments_;
252 : }
253 0 : if (((static_cast<double>(missing_fragment_count_this_event) * 100.0) / static_cast<double>(expected_fragmentID_list_.size())) >= 50.0)
254 : {
255 0 : ++events_with_50pct_missing_fragments_;
256 : }
257 :
258 0 : if (((static_cast<double>(empty_fragment_count_this_event) * 100.0) / static_cast<double>(expected_fragmentID_list_.size())) >= 10.0)
259 : {
260 0 : ++events_with_10pct_empty_fragments_;
261 : }
262 0 : if (((static_cast<double>(empty_fragment_count_this_event) * 100.0) / static_cast<double>(expected_fragmentID_list_.size())) >= 50.0)
263 : {
264 0 : ++events_with_50pct_empty_fragments_;
265 : }
266 :
267 0 : metricMan->sendMetric("EventsWith10PctMissingFragments", events_with_10pct_missing_fragments_, "events",
268 : metrics_reporting_level_, artdaq::MetricMode::LastPoint);
269 0 : metricMan->sendMetric("EventsWith50PctMissingFragments", events_with_50pct_missing_fragments_, "events",
270 : metrics_reporting_level_, artdaq::MetricMode::LastPoint);
271 :
272 0 : metricMan->sendMetric("EventsWith10PctEmptyFragments", events_with_10pct_empty_fragments_, "events",
273 : metrics_reporting_level_, artdaq::MetricMode::LastPoint);
274 0 : metricMan->sendMetric("EventsWith50PctEmptyFragments", events_with_50pct_empty_fragments_, "events",
275 : metrics_reporting_level_, artdaq::MetricMode::LastPoint);
276 :
277 0 : TLOG(TLVL_FRACTIONAL_MODE) << "Event " << evt.event() << ": events_with_10pct_missing_fragments=" << events_with_10pct_missing_fragments_
278 0 : << ", events_with_10pct_empty_fragments=" << events_with_10pct_empty_fragments_;
279 0 : TLOG(TLVL_FRACTIONAL_MODE) << "Event " << evt.event() << ": events_with_50pct_missing_fragments=" << events_with_50pct_missing_fragments_
280 0 : << ", events_with_50pct_empty_fragments=" << events_with_50pct_empty_fragments_;
281 : }
282 :
283 : // reporting for the DETAILED_COUNTS_MODE
284 0 : if (metricMan != nullptr && mode_bitset_.test(DETAILED_COUNTS_MODE))
285 : {
286 : // only send an update when the missing or empty fragment counts, by FragmentID, changed,
287 : // as indicated by a non-zero number of missing or empty fragments in this event
288 0 : if (missing_fragment_count_this_event > 0 || empty_fragment_count_this_event > 0)
289 : {
290 0 : std::ostringstream oss;
291 0 : oss << "<eventbuilder_snapshot app_name=\"" << app_name << "\"><events_processed>" << events_processed_
292 0 : << "</events_processed>";
293 0 : oss << "<missing_fragment_counts>";
294 0 : for (auto const& mapIter : missing_fragments_by_fragmentID_)
295 : {
296 0 : oss << "<count fragment_id=" << mapIter.first << ">" << mapIter.second << "</count>";
297 : }
298 0 : oss << "</missing_fragment_counts>";
299 0 : oss << "<empty_fragment_counts>";
300 0 : for (auto const& mapIter : empty_fragments_by_fragmentID_)
301 : {
302 0 : oss << "<count fragment_id=" << mapIter.first << ">" << mapIter.second << "</count>";
303 : }
304 0 : oss << "</empty_fragment_counts>";
305 0 : oss << "</eventbuilder_snapshot>";
306 :
307 0 : metricMan->sendMetric("EmptyFragmentSnapshot", oss.str(), "xml_string",
308 : metrics_reporting_level_, artdaq::MetricMode::LastPoint);
309 0 : }
310 : }
311 :
312 : #if 0
313 : ==================================================== =
314 :
315 : event_builder_snapshot : {
316 : name: "EventBuilder5"
317 : timestamp : "20190408T124433"
318 : events_built : 105
319 :
320 : sender_list : ["felix501", "felix501", "ssp101", "ssp102"]
321 : valid_fragment_counts : [105, 105, 102, 104]
322 : empty_fragment_counts : [0, 0, 2, 0]
323 : missing_fragment_counts : [0, 0, 1, 1]
324 : }
325 :
326 : ==================================================== =
327 :
328 : <event_builder_snapshot name = "EventBuilder5">
329 : < timestamp>20190408T124433< / timestamp>
330 : < events_built>105 < / events_built
331 :
332 : <sender_list>
333 : <sender index = 0>felix501< / sender>
334 : <sender index = 1>felix502< / sender>
335 : <sender index = 2>ssp101< / sender>
336 : <sender index = 3>ssp102< / sender>
337 : < / sender_list>
338 :
339 : <valid_fragment_counts>
340 : < count index = 0>105 < / count >
341 : < count index = 1>105 < / count >
342 : < count index = 2>102 < / count >
343 : < count index = 3>104 < / count >
344 : < / valid_fragment_counts>
345 :
346 : <empty_fragment_counts>
347 : < count index = 2>2 < / count >
348 : < / empty_fragment_counts>
349 :
350 : <missing_fragment_counts>
351 : < count index = 2>1 < / count >
352 : < count index = 3>1 < / count >
353 : < / missing_fragment_counts>
354 : < / event_builder_snapshot>
355 :
356 : ==================================================== =
357 : #endif
358 0 : }
359 :
360 0 : DEFINE_ART_MODULE(artdaq::FragmentWatcher) // NOLINT(performance-unnecessary-value-param)
|