#ifndef __HZIP__ #define __HZIP__ #include "TObject.h" #include "TFile.h" #include "TSystem.h" #include "TString.h" #include "TRegexp.h" #include "TArchiveFile.h" #include "TArchiveFile.h" #include "TList.h" #include "TObjString.h" #include "TObjArray.h" #include "TChain.h" #include "TChainElement.h" #include #include #include #include using namespace std; //_HADES_CLASS_DESCRIPTION //////////////////////////////////////////////////////////////////////////// // HZip // // A helper class to read/work with zip file containing // many root files. Those files can be typically produced // by : // zip -j -n root myzipname myrootfiles // // Note: root files will not be compressed, directory names // ignored. It's a flat files structure. Purpose of the // ziping of many root files into on zip archive is // to improve the handling of many small files and reduce // the load on the file system. // // To make the daily work more easy a command line executable hzip // is provided to produce and work with those zip files: // // usage: hzip -o zipfile [-i filefilter] [-f filelist] [-u outputdir] [-msth] // -f input ascii filelist (1 file per line) // -h help // -i input filefilter (like "be*.root") // -l list file in zip files // -m maxsize of file [bytes] (default = 2 Gbyte, will be splitted if larger) // -o outputzip file name (required) // -s save mode. do not overide existing zip files (default is overwrite) // -t test. show what would be done // -u dir unzip zip files to dir // -w print in which file membername is contained // examples: // test zip root files : hzip -t -o test.zip -i "/mydir/be*.root" // zip root files : hzip -o test.zip -i "/mydir/be*.root" // zip root files from list : hzip -o test.zip -f filelist // unzip root files to dir : hzip -i "test_*.zip" -u /mydir // list files in zip files : hzip -i "test_*.zip" -l // // from the normal terminal. // HZip provides the functionality to access, list and files from // a root macro. // //-------------------------------------------------------------------------- // examples: // // TChain* chain = new TChain("myTree"); // HZip::makeChain("my.zip",chain); // add all root files to chain // HZip::makeChainGlob("my*.zip",chain); // add all root files of all matching zip files to chain // HZip::makeChainList("filelist.txt",chain); // add all root files of all zip files in filelist to chain // // chain->GetEntries(); // access all files and get number of entries // chain->ls(); // list all files in chain with number of entries // // Bool_t HZip::isInside("my.zip","my.root"); // is my.root contained in my.zip? // Int_t HZip::list("my.zip",".*"); // list all files which match the pattern // Int_t HZip::getList("my.zip",list,".*"); // return to TList list all files which match the pattern // Bool_t HZip::unzip("my.zip","mydir"); // unzip file to directory // Bool_t HZip::addFile("my.zip","my.root"); // add this root file to the zip file // Bool_t HZip::addFiles("my.zip",list); // add all root files from TList list to the zip file //////////////////////////////////////////////////////////////////////////// #define MAXZIPSIZE 2000000000 class HZip : public TObject { public: HZip(){}; virtual ~HZip(){}; static TObjArray* glob(TString pattern) { // Expand 'pattern' as it would be done by the shell (see sh(1)): // '*', '?', '~', '$ENVIRONMENT_VARIABLE' and '[', ']' are expanded. In case // the expansion is successful, a TObjArray of TObjStrings with the // names of all existing files is returned - otherwise NULL. // // The returned TObjArray must be deleted by the caller of the function! // // This example returns all existing .root files in /tmp as well as all .root // files in your home directory starting with the characters 'a', 'b', 'c' // and 'e': // // TObjArray* files = HTool::glob( "/tmp/*.root $HOME/[a-c,e]*.root" ); // if (files) // { // TObjString* name = NULL; // TIterator* file = files->MakeIterator(); // while ((name = (TObjString*)file->Next())) // { // Char_t* input = name->GetString.Data(); ... // } // delete files; // } wordexp_t file_list; Char_t** file; TObjArray* filenames = NULL; if (pattern.IsNull()) return NULL; if (::wordexp( pattern.Data(), &file_list, 0 )) { ::wordfree( &file_list ); return NULL; } file = file_list.we_wordv; filenames = new TObjArray; for (UInt_t i = 0; i < file_list.we_wordc; i++) { // check if files real exist, since ::wordexp(3) returns the unexpanded // pattern, if e.g. the path does not exist if (!gSystem->AccessPathName( file[i] )) filenames->Add( new TObjString( file[i] ) ); } ::wordfree( &file_list ); if (filenames->GetEntries() == 0) { delete filenames; filenames = NULL; } return filenames; } static TObjArray* readFileList(TString listfile) { // read inputfiles form list (one file per line) // and add them to an TObjArray. The pointer to // the array is returned. In case of an error or // if no files are contained the pointer is NULL. // The user must take care of deleting the array. if(gSystem->AccessPathName(listfile)){ printf("Error: readFile() : list file does not exist!"); return NULL; } Char_t line[1000]; ifstream inp; TObjArray* filenames = new TObjArray; inp.open(listfile.Data()); TString name; while(!inp.eof()){ inp.getline (line, 1000); name = line; if(gSystem->AccessPathName(name)){ printf("Error: readFileList() : file %s does not exist!",name.Data()); continue; } filenames->Add( new TObjString( name ) ); } inp.close(); if (filenames->GetEntries() == 0) { delete filenames; filenames = NULL; } return filenames; } static Bool_t chainToTObjArray(TChain* chain=0,TObjArray* filenames=0) { // add all filenames of TChain to TObjArray if(chain == 0){ printf("Error: chainToTObjArray() : TChain pointer is NULL!"); return kFALSE; } if(filenames == 0){ printf("Error: chainToTObjArray() : TObjArray pointer is NULL!"); return kFALSE; } TObjArray* elements = chain->GetListOfFiles(); Int_t nfiles = elements->GetEntries(); for(Int_t i=0;iAt(i); filenames->Add( new TObjString( element->GetTitle() ) ); } return kTRUE; } static Bool_t exists(TString name,Bool_t silent=kFALSE,Bool_t isZip=kTRUE) { // returns kTRUE if file exists and ends with .zip if(isZip && name.EndsWith(".zip") == 0) { if(!silent) printf("Error: File %s does not end with .zip\n!",name.Data()); return kFALSE; } if(gSystem->AccessPathName(name.Data()) != 0) { if(!silent) printf("Error: File %s does not exist\n!",name.Data()); return kFALSE; } return kTRUE; } static Bool_t splitName(TString fullname, TString& zipname,TString& membername) { // if filename is .zip#membername the // name is splitted into the zip file name and // the archive member name. return kTRUE // if success els kFALSE if(fullname.Contains(".zip#") != 0){ zipname = fullname; zipname.Remove(zipname.First('#')); membername = fullname; membername.Replace(0,membername.First('#') + 1,""); return kTRUE; } printf("Error: isInside() : Filename does not contain .zip#membername!\n"); return kFALSE; } static Bool_t isInside(TString name, Bool_t print=kFALSE) { // test if the member (format: zipfilename.zip#membername) // is inside the zip file. Return kTRUE if // success else kFALSE TString zipname; TString membername; if(!HZip::splitName(name,zipname,membername)) { return kFALSE; } if(!HZip::exists(zipname)) return kFALSE; Bool_t found = kFALSE; TFile* fzip = TFile::Open(zipname.Data()); if(fzip) { TArchiveFile* archive = fzip->GetArchive(); if(archive){ TObjArray* members = archive->GetMembers(); if(members->FindObject(membername.Data()) == 0) { if(print) printf("Info: isInside() : File %s not found ind zipfile %s\n",membername.Data(),zipname.Data()); found = kFALSE; } else { if(print) printf("Info: isInside() : File %s found ind zipfile %s\n" ,membername.Data(),zipname.Data()); found = kTRUE; } } else { printf("Error: isInside() : Retrieved NULL pointer for Archive!\n"); } fzip->Close(); delete fzip; } return found; } static Bool_t isInside(TString zipname, TString membername ,Bool_t print=kFALSE) { // test if the member is inside the zip file. // Return kTRUE if success else kFALSE TString name = zipname + "#" + membername; return isInside(name,print); } static Int_t list(TString name, TString filter=".*",Int_t size=0,Int_t time=0) { // lists the members of the zip file which pass // the name filter (see TRegexp). returns the number // of matched files if(!HZip::exists(name)) return 0; TFile* fzip = TFile::Open(name.Data()); if(fzip) { TArchiveFile* archive = fzip->GetArchive(); if(archive){ cout<<"-------------------------------------------------------------------------------"<GetMembers(); TString fname = ""; TRegexp expr(filter); Int_t ct = 0; for(Int_t i = 0; i < members->GetEntries(); i++){ TArchiveMember* member = (TArchiveMember*) members->At(i); fname = member->GetName(); if(fname(expr) != ""){ cout<GetCompressedSize() <<" mod time "<GetModTime().AsString() <<" "<GetName()<Close(); delete fzip; } return 0; } static Int_t getList(TString name,TList* list,TString filter=".*",Int_t size=0,Int_t time=0) { // fills a list with the members of the zip file which pass // the name filter (see TRegexp). returns the number // of matched files if(!HZip::exists(name)) return 0; TFile* fzip = TFile::Open(name.Data()); if(fzip) { TArchiveFile* archive = fzip->GetArchive(); if(archive){ TObjArray* members = archive->GetMembers(); TString fname = ""; TRegexp expr(filter); Int_t ct = 0; for(Int_t i = 0; i < members->GetEntries(); i++){ TArchiveMember* member = (TArchiveMember*) members->At(i); fname = member->GetName(); if(fname(expr) != ""){ list->Add(new TObjString(member->GetName())); ct++; } } return ct; } else { printf("Error: list() : Retrived NULL pointer for Archive!\n"); } fzip->Close(); delete fzip; } return 0; } static Bool_t makeChain(TString zipname,TChain* chain = 0,TString filter=".*",Int_t size=0,Int_t time=0) { // adds all root members of the zip file matching the filter // to TChain (has to be created before) if(chain == 0){ printf("Error: makeChain() : TChain pointer is NULL!"); return kFALSE; } TList* list = new TList(); HZip::getList(zipname,list,filter,size,time); HZip::list(zipname,filter,size,time); TObjString* member; TIterator* iter = list->MakeIterator(); while((member = (TObjString*)iter->Next()) != 0){ TString membername = member->GetString(); TString fullname = zipname + "#" + membername; chain->Add(fullname.Data()); } delete iter; delete list; return kTRUE; } static Bool_t makeChainGlob(TString expressionzip,TChain* chain = 0,TString filter=".*",Int_t size=0,Int_t time=0) { // adds all root members match the filter of all zip files matching the expression // to TChain (has to be created before) if(chain == 0){ printf("Error: makeChainGlob() : TChain pointer is NULL!"); return kFALSE; } TObjArray* files = HZip::glob(expressionzip); Int_t nfiles = files->GetEntries(); if(nfiles == 0) return kFALSE; for(Int_t j=0;jAt(j)))->GetString(); HZip::makeChain(fna,chain,filter,size,time); } delete files; return kTRUE; } static Bool_t makeChainList(TString listfile,TChain* chain = 0,TString filter=".*",Int_t size=0,Int_t time=0) { // adds all root members match the filter of all zip files matching the expression // to TChain (has to be created before). The file list should contain // one filename perline. if(chain == 0){ printf("Error: makeChainList() : TChain pointer is NULL!"); return kFALSE; } if(gSystem->AccessPathName(listfile)){ printf("Error: makeChainList() : list file does not exist!"); return kFALSE; } Char_t line[1000]; ifstream inp; inp.open(listfile.Data()); TString name; while(!inp.eof()){ inp.getline (line, 1000); name = line; if(gSystem->AccessPathName(name)){ printf("Error: makeChainList() : file %s does not exist!",name.Data()); continue; } HZip::makeChain(name,chain,filter,size,time); } inp.close(); return kTRUE; } static Bool_t addFile(TString zipname,TString membername, Int_t mode=0) { // mode = 0 ==> do not replace existing members and warn // mode = 1 ==> do not replace existing members and do not warn // mode = 2 ==> replace existing members and do not warn TString zipName = zipname; zipName.ReplaceAll(".zip",""); TString cmd = Form("zip -j -g -n .root %s %s 1>/dev/null",zipName.Data(),membername.Data()); if(!HZip::exists(membername,kFALSE,kFALSE)) return kFALSE; // complain if not existing if(!HZip::exists(zipname,kTRUE)) // do not complain if not existing { // create zipfile new Int_t rc = gSystem->Exec(cmd.Data()); if(rc == 0) { cout<<"adding: "<Exec(cmd.Data()); if(rc == 0) { if(!inside){ cout<<"adding: "<Exec(cmd.Data()); if(rc == 0) { cout<<"adding: "< do not replace existing members and warn // mode = 1 ==> do not replace existing members and do not warn // mode = 2 ==> replace existing members and do not warn cout<<"-------------------------------------------------------------------------------"<MakeIterator(); while((member = (TObjString*)iter->Next()) != 0){ TString membername = member->GetString(); if(!addFile(zipname,membername,mode)) { delete iter; cout<<"-------------------------------------------------------------------------------"<Exec(cmd.Data()); if(rc == 0) { return kTRUE; } else { printf("Error: test() : zip returned with error!"); return kFALSE; } } static Bool_t unzip(TString zipname,TString outDir=""){ // unzip content of zipfile to outDir (optional, default is local dir) TString cmd; if(outDir != "") cmd = Form("unzip %s -d %s",zipname.Data(),outDir.Data()); else cmd = Form("unzip %s" ,zipname.Data()); Int_t rc = gSystem->Exec(cmd.Data()); if(rc == 0) { return kTRUE; } else { printf("Error: unzip() : zip returned with error!"); return kFALSE; } } ClassDef(HZip,0); }; //ClassImp(HZip) #endif