Hall A ROOT/C++ Analyzer (podd)
Loading...
Searching...
No Matches
MultiFileRun.h
Go to the documentation of this file.
1#ifndef Podd_MultiFileRun_h_
2#define Podd_MultiFileRun_h_
5//
6// Podd::MultiFileRun
7//
8// This class presents multiple CODA input files as if they were a
9// single file through the THaRun API. It supports files split into
10// consecutive "segments", files written in parallel by CODA in
11// multi-stream mode, and a combination of both. Both CODA 2 and CODA 3
12// file formats are supported, as with THaRun.
13//
14// For convenience, wildcard and regular expression file names are
15// supported. File directory paths can be either part of the file name,
16// with support for wildcards/regexp as well, or be specified in a vector
17// of strings (again with wildcard/regexp support). Filenames and paths may
18// contain environment variables, which will be expanded if
19// defined. (Undefined environment variables generate an error.)
20//
21// The file name convention is <name>.<stream_index>.<segment_index> or
22// <name>.<segment_index>. If the stream index is missing, a single stream
23// is assumed. These patterns can be changed by overriding certain virtual
24// methods.
25//
26// Examples:
27//
28// #include "MultiFileRun.h"
29//
30// auto run =
31// make_unique<Podd::MultiFileRun>("/daq/data[1-4]/e1234_evio.?.[0-9]+",
32// "Run 1234 all files");
33//
34// vector<string> paths{"/daq/data[1-4]", "/cache/hallx/raw", "$CACHE/copies"};
35// auto run =
36// make_unique<Podd::MultiFileRun>(paths, "e1234_evio.*.*",
37// "Run 1234 from daq and cache all files");
38//
39// run->Init();
40// run->Print();
41//
42// With the powerful wildcard/regexp matching capability, one has to take
43// care that files from different runs are not mixed accidentally. The
44// class will refuse to process files whose stream and segment (or only the
45// segment in case of a single stream) are identical. Additionally, a
46// warning will be printed if the file name stem (<name> in the pattern
47// above) is not identical for all files. Also, different CODA formats
48// cannot be mixed.
49//
50// File segments are read sequentially. File streams are read in parallel,
51// where data from the stream with the lowest current physics event number
52// will be presented first. With the usual CODA round-robin write strategy,
53// this will normally yield consecutive event numbers on consecutive calls
54// to ReadEvent().
55//
56// Special events (e.g. slow controls, scalers) may not be delivered in the
57// exact order in which they were written. This behavior may be fine-tuned
58// in a future release.
59//
60// The current input file, segment number, and stream number can be
61// obtained from GetFilename(), GetSegment(), and GetStream(),
62// respectively, after each call to ReadEvent().
63//
64// Initialization info is retrieved from segment 0, as with THaRun. If this
65// segment is not part of the input file list, the code will attempt to
66// locate it in the current run's directory and in any directories in the
67// search path list combined with any directory components in the file list.
68//
69// Currently, it is assumed that segment 0 of any stream provides init info.
70// If only stream 0 provides it, for example, it will be necessary to override
71// THaRun::ProvidesInitInfo() and THaRun::GetInitInfoFileName to reflect that.
72// Details naturally depend on the experiment-specific DAQ configuration.
73//
74// Like THaRun, this class can be persisted through ROOT I/O. This will
75// save, among other data, the paths and stream/segment info of all input
76// files matched at initialization time.
77//
79
80#include "THaRun.h"
81#include "THaCodaData.h"
82#include <vector>
83#include <string>
84#include <memory>
85#include <utility>
86#include <functional> // std::function
87
88class TRegexp;
89
90namespace Podd {
91
92class MultiFileRun : public THaRun {
93public:
94 explicit MultiFileRun( const char* filenamePattern = "",
95 const char* description = "",
96 bool is_regex = false );
97 explicit MultiFileRun( std::vector<std::string> pathList,
98 const char* filenamePattern = "",
99 const char* description = "",
100 bool is_regex = false );
101 explicit MultiFileRun( std::vector<std::string> pathList,
102 std::vector<std::string> fileList,
103 const char* description = "",
104 bool is_regex = false );
105 MultiFileRun( const MultiFileRun& run );
106 MultiFileRun& operator=( const THaRunBase& rhs );
107
108 virtual void Clear( Option_t* opt="" );
109 virtual Int_t Close();
110 virtual Int_t Compare( const TObject* obj ) const;
111 virtual Int_t GetDataVersion();
112 virtual const UInt_t* GetEvBuffer() const;
113 // Get the last-seen physics event number. For CODA 3 in block mode, this
114 // is the event number of the first event in the block.
115 virtual UInt_t GetEvNum() const;
116 virtual Bool_t IsOpen() const;
117 virtual Int_t Open();
118 virtual void Print( Option_t* opt="" ) const;
119 virtual Int_t ReadEvent();
120 virtual Int_t SetFilename( const char* name );
121 bool SetFileList( std::vector<std::string> filelist );
122 bool SetPathList( std::vector<std::string> pathlist );
123 void SetFlags( UInt_t set ) { fFlags = set; }
124 UInt_t GetFlags() const { return fFlags; }
125
126 // These getters will return valid data after Init()
127 // Number of input files found in all streams
128 UInt_t GetNFiles() const;
129 // Number of streams found
130 UInt_t GetNStreams() const;
131 // Lowest segment number found in any stream (>= fFirstSegment)
132 Int_t GetStartSegment() const;
133 // Highest segment number found in any stream
134 Int_t GetLastSegment() const;
135 // Lowest stream number found (-1 = no stream index found)
136 Int_t GetStartStream() const;
137 // Highest stream number found (-1 = no stream index found)
138 Int_t GetLastStream() const;
139 // Complete list of input files found
140 std::vector<std::string> GetFiles() const;
141 // Number of successful calls to ReadEvent()
142 UInt_t GetNevRead() const { return fNevRead; }
143
144 // Configuration of segment/stream ranges. If called, must Init() again
145 void SetFirstSegment( Int_t n );
146 void SetFirstStream( Int_t n );
147 void SetMaxSegments( Int_t n );
148 void SetMaxStreams( Int_t n );
149
150 // True if filename pattern is to be interpreted as a full ROOT TRegexp,
151 // false if interpreted as a wildcard expression
153
154 struct FileInfo {
156 FileInfo( std::string path, std::string stem, Int_t seg );
157 bool operator< ( const FileInfo& rhs ) const {
158 return fSegment < rhs.fSegment;
159 }
160 std::string fPath; // Full file path
161 std::string fStem; // File basename stem (without stream/segment no)
162 Int_t fSegment; // File segment number
163 ClassDefNV(FileInfo, 1) // CODA file descriptor for MultiFileRun
164 } __attribute__((aligned(64)));
165
166 struct StreamInfo {
167 StreamInfo();
168 explicit StreamInfo( Int_t id );
169 StreamInfo( const StreamInfo& rhs );
170 StreamInfo& operator=( const StreamInfo& rhs );
171 bool operator==( const StreamInfo& rhs ) const {
172 return fID == rhs.fID && fVersion == rhs.fVersion;
173 }
174 bool operator<( const StreamInfo& rhs ) const {
175 if( fID != rhs.fID ) return fID < rhs.fID;
176 return fVersion < rhs.fVersion;
177 }
178 Int_t Open();
179 Int_t Read();
180 Int_t Close();
181 Bool_t IsGood() const;
182 const UInt_t* GetEvBuffer() const;
183 const std::string& GetFilename() const;
184
185 std::unique_ptr<Decoder::THaCodaData> fCodaData;
186 std::vector<FileInfo> fFiles;
187 Int_t fID; // Stream ID (-1 = default/none)
188 Int_t fVersion; // CODA version
192 private:
195 ClassDefNV(StreamInfo, 1) // CODA stream descriptor for MultiFileRun
196 } __attribute__((aligned(64)));
197
198 //TODO: not yet implemented, may change
199 enum EFlags {
200 kRequireAllSegments, // Require all segments in range present
201 kRequireAllFiles, // Require all non-wildcard files present
202 kDoNotSkipDupFileNames // Keep searching even if file already found
203 };
204
205protected:
206 // Configuration
207 std::vector<std::string> fFileList; // File name pattern (wildcard or regexp)
208 std::vector<std::string> fPathList; // List of search paths
209 std::vector<StreamInfo> fStreams; // Event streams to process
210 Int_t fFirstSegment; // First segment number to process
211 Int_t fFirstStream; // First stream number to process
212 UInt_t fMaxSegments; // Maximum number of segments to process
213 UInt_t fMaxStreams; // Maximum number of streams to process
214 UInt_t fFlags; // Flags (see EFlags)
215 Bool_t fNameIsRegexp; // Interpret path/file names as TRegexp
216 // Working data
220
221 virtual Int_t BuildInputList();
222 virtual Bool_t FindSegmentNumber();
223 virtual Int_t FindNextStream() const;
224 virtual TString FindInitInfoFile( const TString& fname );
225
227 void ClearStreams();
228 void ExpandFileName( std::string& str ) const;
229 bool HasWildcards( const TString& str ) const;
230 void PrintStreamInfo() const;
231 void PrintFileInfo() const;
232
233private:
234 using path_t = std::pair<TString, TString>;
235 using action_t = std::function<Int_t( const TString&, const TString& )>;
236 Int_t AddFile( const TString& file, const TString& dir );
237 void AssembleFilePaths( std::vector<path_t>& candidates );
238 void AssembleFilePaths( std::vector<path_t>& candidates,
239 const std::vector<std::string>& file_list );
240 Int_t BuildInputListFromWildcardDir( const path_t& path, const action_t& action );
241 Int_t BuildInputListFromTopDir( const path_t& path, const action_t& action );
243 Int_t DescendInto( const TString& curpath, const std::vector<TString>& splitpath,
244 Int_t level, const action_t& action );
245 Int_t ForEachMatchItemInDir( const TString& dir, const TRegexp& match_re,
246 const action_t& action );
247 Int_t ScanForFilename( const path_t& path, bool regex_mode, const action_t& action );
248 Int_t ScanForSubdirs( const TString& curdir, const std::vector<TString>& splitpath,
249 Int_t level, bool regex_mode, const action_t& action );
250 void SortStreams();
251
252 enum { kResolvingWildcard = BIT(18) };
253
254 ClassDef(MultiFileRun, 2) // CODA data from multiple files
255};
256
257} //namespace Podd
258
259#endif //Podd_MultiFileRun_h_
int Int_t
unsigned int UInt_t
bool Bool_t
const char Option_t
#define ClassDef(name, id)
#define ClassDefNV(name, id)
#define BIT(n)
char name[80]
virtual Bool_t IsOpen() const
void AssembleFilePaths(std::vector< path_t > &candidates)
Int_t ForEachMatchItemInDir(const TString &dir, const TRegexp &match_re, const action_t &action)
virtual Int_t Close()
std::pair< TString, TString > path_t
void PrintStreamInfo() const
virtual Int_t FindNextStream() const
Int_t GetLastSegment() const
virtual UInt_t GetEvNum() const
bool SetPathList(std::vector< std::string > pathlist)
UInt_t GetNStreams() const
void SetFirstSegment(Int_t n)
virtual void Clear(Option_t *opt="")
Bool_t IsNameRegexp() const
void SetMaxStreams(Int_t n)
std::vector< std::string > GetFiles() const
UInt_t fNevRead
Number of active streams.
UInt_t GetNevRead() const
void SetFlags(UInt_t set)
virtual Int_t BuildInputList()
Number of events read.
virtual Int_t ReadEvent()
virtual Int_t Open()
enum Podd::MultiFileRun::EFlags __attribute__
void PrintFileInfo() const
Int_t ScanForFilename(const path_t &path, bool regex_mode, const action_t &action)
Int_t BuildInputListFromWildcardDir(const path_t &path, const action_t &action)
std::vector< std::string > fFileList
void SetMaxSegments(Int_t n)
virtual void Print(Option_t *opt="") const
virtual Int_t GetDataVersion()
std::vector< StreamInfo > fStreams
std::function< Int_t(const TString &, const TString &)> action_t
bool SetFileList(std::vector< std::string > filelist)
Int_t ScanForSubdirs(const TString &curdir, const std::vector< TString > &splitpath, Int_t level, bool regex_mode, const action_t &action)
Int_t AddFile(const TString &file, const TString &dir)
Int_t GetLastStream() const
Int_t GetStartStream() const
Int_t BuildInputListFromTopDir(const path_t &path, const action_t &action)
bool HasWildcards(const TString &str) const
virtual Int_t SetFilename(const char *name)
virtual const UInt_t * GetEvBuffer() const
std::vector< std::string > fPathList
UInt_t GetFlags() const
Int_t GetStartSegment() const
virtual Bool_t FindSegmentNumber()
void SetFirstStream(Int_t n)
virtual Int_t Compare(const TObject *obj) const
Int_t DescendInto(const TString &curpath, const std::vector< TString > &splitpath, Int_t level, const action_t &action)
MultiFileRun & operator=(const THaRunBase &rhs)
UInt_t GetNFiles() const
void ExpandFileName(std::string &str) const
Int_t fNActive
Index of last stream that was read.
virtual TString FindInitInfoFile(const TString &fname)
bool operator<(const FileInfo &rhs) const
const std::string & GetFilename() const
Bool_t fActive
Number of most recent physics event.
Int_t OpenCurrent()
Stream has not yet reached EOF.
StreamInfo & operator=(const StreamInfo &rhs)
UInt_t fEvNum
Index of currently open file.
std::vector< FileInfo > fFiles
Coda data (file)
bool operator<(const StreamInfo &rhs) const
std::unique_ptr< Decoder::THaCodaData > fCodaData
const UInt_t * GetEvBuffer() const
bool operator==(const StreamInfo &rhs) const