/*
 * Written by Bastien Chevreux (BaCh)
 *
 * Copyright (C) 1997-2000 by the German Cancer Research Center (Deutsches
 *   Krebsforschungszentrum, DKFZ Heidelberg) and Bastien Chevreux
 * Copyright (C) 2000 and later by Bastien Chevreux
 *
 * All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the
 * Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 *
 */

#include <getopt.h>

#include <boost/algorithm/string.hpp>
#include <boost/filesystem.hpp>

#include "modules/mod_bait.H"
#include "util/fmttext.H"

#include "version.H"


using namespace std;


vector<MIRAParameters> MiraBait::MB_Pv;

string MiraBait::MB_fromtype;
string MiraBait::MB_totype;
string MiraBait::MB_baitfromtype;

uint8 MiraBait::MB_tortype;

list<string> MiraBait::MB_infiles;
list<string> MiraBait::MB_baitfiles;
string MiraBait::MB_hitpath;
string MiraBait::MB_misspath;
string MiraBait::MB_nameprefix;

string MiraBait::MB_hashstatfname;

bool   MiraBait::MB_deletestaronlycolumns=false;
bool   MiraBait::MB_mustdeletetargetfiles=true;
bool   MiraBait::MB_wantbaithits=true;
bool   MiraBait::MB_wantbaitmiss=false;
bool   MiraBait::MB_fwdandrev=true;
bool   MiraBait::MB_changeseqcase=true;
bool   MiraBait::MB_mergeoutput=false;
int32  MiraBait::MB_numbaithits=1;

list<char> MiraBait::MB_filepairinfo;    // p = 2 files, P = 1 file interleave
uint32 MiraBait::MB_basesperhash=0;
uint64 MiraBait::MB_baitpoolsize=0;
uint64 MiraBait::MB_numreadsread=0;
uint64 MiraBait::MB_numpairsbaited=0;
uint64 MiraBait::MB_numpairsmissed=0;
uint64 MiraBait::MB_numunpairedbaited=0;
uint64 MiraBait::MB_numunpairedmissed=0;

uint64 MiraBait::MB_numclippedreadsinload; // number of reads which have clips already when loaded (CAF/MAF)

list<MiraBait::wqueueunit_t> MiraBait::MB_workqueue;
MiraBait::files_t MiraBait::MB_files;

unordered_map<string,uint8> MiraBait::MB_fromtypemap = {
  {"fastq",Read::AS_FASTQ},
  {"fasta",Read::AS_FASTA},
  {"gb",Read::AS_GBF},
  {"gbf",Read::AS_GBF},
  {"gbk",Read::AS_GBF},
  {"gbff",Read::AS_GBF},
  {"caf",Read::AS_CAF},
  {"maf",Read::AS_MAF},
  {"exp",Read::AS_GAP4DA},
  {"phd",Read::AS_PHD},
};

unordered_map<string,uint8> MiraBait::MB_totypemap = {
  {"fastq",Read::AS_FASTQ},
  {"fasta",Read::AS_FASTA},
  {"caf",Read::AS_CAF},
  {"scaf",Read::AS_CAF},
  {"maf",Read::AS_MAF},
  {"txt",Read::AS_READNAME},
};


MiraBait::~MiraBait()
{
}

void MiraBait::usage()
{
//hdiIpPrvb:f:t:o:a:k:L:n:
  cout << "mirabait\t(MIRALIB version " << MIRALIBVERSION << ")\n";
  cout << "Author: Bastien Chevreux\t(bach@chevreux.org)\n\n";

  cout << FmtText::wordWrap("mirabait selects reads from a read collection which are partly similar or equal to sequences defined as target baits. Similarity is defined by finding a user-adjustable number of common k-mers (sequences of k consecutive bases) which are the same in the bait sequences and the screened sequences to be selected, either in forward or forward/reverse complement direction.\nWhen used on paired files, selects sequences where at least one mate matches.\n");
  cout << "\nUsage:\n";
  cout << "mirabait [options]"
    "\n\t\t{-b baitfile [-b ...] | -L file}"
    "\n\t\t{-p file_1 file_2 | -P file3}* [file4 ...]\n\n";
  cout << "Main options:\n";
  cout << "\t-b file\t\tLoad bait sequences from file\n"
    "\t\t\t (multiple -b allowed)\n"
    "\t-p file1 file2\tLoad paired sequences to search from file1 and file2\n"
    "\t\t\t Files must contain same number of sequences, sequence \n"
    "\t\t\t names must be in same order.\n"
    "\t\t\t Multiple -p allowed, but must come before non-paired\n"
    "\t\t\t files.\n"
    "\t-P file\t\tLoad paired sequences from file\n"
    "\t\t\t File must be interleaved: pairs must follow each other,\n"
    "\t\t\t non-pairs are not allowed.\n"
    "\t\t\t Multiple -p allowed, but must come before non-paired\n"
    "\t\t\t files.\n";

  cout << "\n"
    "\t-L file\t\tDo not compute hash statistics, load from file\n"
    "\t\t\t (which the must be a mirabait hash statistics file).\n";

  cout << "\n"
    "\t-k\t\tk-mer, length of bait in bases (<=256, default=31)\n"
    "\t-n\t\tIf >0: minimum number of k-mer baits needed (default=1)\n"
    "\t\t\tIf <=0: allowed number of missed kmers over sequence\n"
    "\t\t\t        length\n"
    "\t-i\t\tSelects sequences that do not hit bait\n"
    "\t-I\t\tSelects sequences that hit and do not hit bait (to\n"
    "\t\t\t different files)\n"
    "\t-r\t\tNo checking of reverse complement direction\n"
    "\t-c\t\tNo case change of sequence to denote bait hits\n";

  cout << "\nOptions for defining files types to load and save:\n";
  cout << FmtText::wordWrap("Normally, mirabait recognises the file types according to the file extension. In cases you need to force a certain file type because the file extension is non-standard, use the following options:\n");
  cout << "\t-f <fromtype>\tload sequences from this type of files, where type is:\n"
    "\t   caf\t\t sequences from CAF\n"
    "\t   fasta\t sequences from a FASTA\n"
    "\t   fastq\t sequences from a FASTQ\n"
    "\t   gb[f|k|ff]\t sequences from GenBank\n"
    "\t   maf\t\t sequences from MAF\n"
    "\t   phd\t\t sequences from a PHD\n"
    ;
  cout << "\t-F <fromtype>\tload bait sequences from this type of file\n"
    "\t\t\t For supported file types, see -f\n";
  cout << "\t-t <totype>\twrite the sequences to this type;\n"
    "\t   caf\t\t sequences to CAF\n"
    "\t   fasta\t sequences to FASTA (saved wo quality values)\n"
    "\t   fastq\t sequences to FASTQ\n"
    "\t   maf\t\t sequences to MAF\n"
    "\t   txt\t\t sequence names to text file\n";

  cout << "\nOptions for output definition:\n";
  cout << FmtText::wordWrap("Normally, mirabait writes separate result files (named 'bait_match_*' and 'bait_miss_*') for each input to the current directory. For changing this behaviour, use these options:\n");
  cout << "\t-N name\t\tChange the prefix 'bait' to <name>\n"
    "\t\t\t Has no effect if -o/-O is used and targets are not\n"
    "\t\t\t directories\n";
  cout << "\t-o <path>\tSave sequences matching bait to path\n"
    "\t\t\t If path is a directory, write separate files into this\n"
    "\t\t\t directory. If not, combine all matching sequences from\n"
    "\t\t\t the input file(s) into a single file specified by the\n"
    "\t\t\t path.\n";
  cout << "\t-O <path>\tLike -o, but for sequences not matching\n";

//  cout << "\n"
//    "\t-o\t\tfastq quality Offset (only for -f = 'fastq')\n"
//    "\t\t\t Offset of quality values in FASTQ file. Default: 33\n"
//    "\t\t\t A value of 0 tries to automatically recognise.\n";



//  cout << "\t-a <string>\tString with MIRA parameters to be parsed\n"
//    "\t\t\t Useful when setting parameters affecting consensus\n"
//    "\t\t\t calling like -CO:mrpg etc.\n"
//    "\t\t\t E.g.: -a \"454_SETTINGS -CO:mrpg=3\"\n";

  cout << "\nExamples:\n"
    "  mirabait -b b.fasta file.fastq"
    "\n  mirabait -b b1.fasta -b b2.gbk file.fastq"
    "\n  mirabait -b b.fasta -p file_1.fastq file_2.fastq"
    "\n  mirabait -b b.fasta -p file_1.fastq file_2.fastq -P file3.fasta file4.caf"
    "\n  mirabait -I -b b.fasta -p file_1.fastq file_2.fastq -P file3.fasta file4.caf"
    "\n  mirabait -k 27 -n 10 -b b.fasta file.fastq"
    "\n  mirabait -F fasta -f fastq -b b.dat file.dat"
    "\n  mirabait -o /dev/shm/ -b b.fasta -p file_1.fastq file_2.fastq"
    "\n  mirabait -o /dev/shm/match -b b.fasta -p file_1.fastq file_2.fastq"
    "\n";
}


uint8 MiraBait::checkFromType(string & fromtype)
{
  auto mI=MB_fromtypemap.find(fromtype);
  if(mI!=MB_fromtypemap.end()) return mI->second;
  return Read::AS_ILLEGAL;
}

uint8 MiraBait::checkToType(string & totype)
{
  auto mI=MB_totypemap.find(totype);
  if(mI!=MB_totypemap.end()) return mI->second;
  return Read::AS_ILLEGAL;
}



uint8 MiraBait::setupRPIO(const string & filename, ReadGroupLib::ReadGroupID rgid, ReadPoolIO & rpio, uint8 & ziptype)
{
  ziptype=0;
  string filetype;
  string dummytostem;
  string dummypathto;
  if(MB_fromtype.empty()){
    guessFileAndZipType(filename,dummypathto,dummytostem,filetype,ziptype);
  }else{
    filetype=MB_fromtype;
  }

  boost::to_lower(filetype);
  uint8 rtype=checkFromType(filetype);

  cout << "Loading data from " << filename << " (" << filetype << ")";

  string fn2;
  if(filetype=="fasta"){
    fn2=filename+".qual";
  }

  string loadtype(filetype);
  if(loadtype=="fasta"){
    loadtype="fastanoqual";
  }

  rpio.registerFile(loadtype, filename, fn2, rgid, false);
  return rtype;
}

void MiraBait::setupOutfiles(const string & fname, uint8 rtype, uint8 ziptype, ofstream & hitfout, ofstream & missfout)
{
  if(hitfout.is_open()) hitfout.close();
  if(missfout.is_open()) missfout.close();

  string pname(MB_nameprefix);
  if(pname.empty()) pname="bait";
  string matchpre(pname+"_match_");
  string misspre(pname+"_miss_");

  boost::filesystem::path fp(fname);
  // if we work on zipped files, the ouput must get away the zip extension
  if(ziptype){
    fp=fp.parent_path() / fp.stem();  // boost::filesystem append operation
  }

  string hitname;
  if(MB_mergeoutput && !MB_hitpath.empty()){
    hitname=MB_hitpath;
  }else{
    hitname=MB_hitpath;
    if(!hitname.empty()) hitname+='/';
    hitname+=matchpre+fp.filename().string();
  }
  string missname;
  if(MB_mergeoutput && !MB_misspath.empty()){
    missname=MB_misspath;
  }else{
    missname=MB_misspath;
    if(!missname.empty()) missname+='/';
    missname+=misspre+fp.filename().string();
  }

  auto omode=ios::out;
  if(MB_mergeoutput) omode|=ios::app;
  if(MB_wantbaithits){
    hitfout.open(hitname,omode);
    if(!hitfout.is_open()){
      cout.flush();
      cerr << "\n\nCould not open " << hitname << ".\nDoes the path exist, is it writable? Is the disk full?";
      exit(10);
    }
  }
  if(MB_wantbaitmiss){
    missfout.open(missname,omode);
    if(!missfout.is_open()){
      cout.flush();
      cerr << "\n\nCould not open " << missname << ".\nDoes the path exist, is it writable? Is the disk full?";
      exit(10);
    }
  }

  if(MB_wantbaithits && MB_wantbaitmiss){
    cout << ", sorting" << endl;
  }else{
    cout << ", filtering" << endl;
  }
  if(MB_wantbaithits){
    cout << "+++ matches to " << hitname << endl;
  }
  if(MB_wantbaitmiss){
    cout << "--- non-matches to " << missname << endl;
  }
}


template<typename TVHASH_T>
void MiraBait::baitReads(HashStatistics<TVHASH_T> & hs, const ReadPool & rp, vector<uint8> & take)
{
  take.clear();
  take.resize(rp.size(),0);
  for(uint32 rpi=0; rpi<take.size(); ++rpi){
    if(rp[rpi].getLenClippedSeq() != rp[rpi].getLenSeq()){
      ++MB_numclippedreadsinload;
    }
    uint32 neededhashes=static_cast<uint32>(MB_numbaithits);
    if(MB_numbaithits<=0){
      auto tmpnh=
	static_cast<int32>(rp[rpi].getLenClippedSeq())-static_cast<int32>(MB_basesperhash-1)+MB_numbaithits;
      if(tmpnh<0) tmpnh=0;
      neededhashes=static_cast<uint32>(tmpnh);
    }
    if(hs.checkBaitHit(rp[rpi],MB_changeseqcase) >= neededhashes){
      take[rpi]=1;
    }
  }
}

void MiraBait::saveWQueueElement(wqueueunit_t & wqu)
{
  FUNCSTART("void MiraBait::saveWQueueElement(wqueueunit_t & wqu)");

  Read::setCoutType(MB_files.writetype);

  uint64 numbaited=0;
  uint64 numnonbaited=0;

  auto t1I=wqu.take1.begin();
  for(uint32 rpi=0; rpi<wqu.rp1.size(); ++rpi, ++t1I){
    if(*t1I && MB_wantbaithits) {
      MB_files.hitfout1 << wqu.rp1[rpi];
      ++numbaited;
    }else if(!*t1I && MB_wantbaitmiss){
      MB_files.missfout1 << wqu.rp1[rpi];
      ++numnonbaited;
    }
    if(wqu.pairstatus==PS_2FILES){
      if(*t1I && MB_wantbaithits) {
	if(MB_mergeoutput){
	  MB_files.hitfout1 << wqu.rp2[rpi];
	}else{
	  MB_files.hitfout2 << wqu.rp2[rpi];
	}
      }else if(!*t1I && MB_wantbaitmiss){
	if(MB_mergeoutput){
	  MB_files.missfout1 << wqu.rp2[rpi];
	}else{
	  MB_files.missfout2 << wqu.rp2[rpi];
	}
      }
    }
  }

  MB_numreadsread+=wqu.rp1.size()+wqu.rp2.size();

  if(wqu.pairstatus==PS_NOPAIR){
    MB_numunpairedbaited+=numbaited;
    MB_numunpairedmissed+=numnonbaited;
  }else if(wqu.pairstatus==PS_INTERLEAVE){
    MB_numpairsbaited+=numbaited/2;
    MB_numpairsmissed+=numnonbaited/2;
  }else if(wqu.pairstatus==PS_2FILES){
    MB_numpairsbaited+=numbaited;
    MB_numpairsmissed+=numnonbaited;
  }else{
    MIRANOTIFY(Notify::INTERNAL,"Ummm ... unknown pairstatus " << static_cast<uint16>(wqu.pairstatus));
  }

//  MB_numreadsread+=rp.size();
//  MB_numreadswritten+=rp.size();
}


template<typename TVHASH_T>
void MiraBait::doBaitWithHS(HashStatistics<TVHASH_T> & mbhs)
{
  FUNCSTART("void MiraBait::doBaitWithHS(HashStatistics<TVHASH_T> & mbhs)");
  {
    if(!MB_hashstatfname.empty()){
      cout << "Loading from existing hashstat file ... "; cout.flush();
      mbhs.loadHashStatistics(MB_hashstatfname);
      cout << "done.\n";
      if(MB_basesperhash!=0){
	if(mbhs.getBasesPerHash()!=MB_basesperhash){
	  cout << "Error: kmer size set for mirabait (" << MB_basesperhash
	       << ") is not equal to the kmer size loaded from file ("
	       << mbhs.getBasesPerHash() << ")!\nDid you know you can leave away -k when using -L?\nAborting!\n";
	  exit(10);
	}
      }else{
	cout << "No -k given, using k from the loaded file: "
	     << mbhs.getBasesPerHash() << endl;
	MB_basesperhash=mbhs.getBasesPerHash();
      }
    }else{
      ReadGroupLib::ReadGroupID rgid=ReadGroupLib::newReadGroup();
      rgid.setSequencingType(ReadGroupLib::SEQTYPE_TEXT);

      cout << "Loading baits ...";
      ReadPool baitrp;
      ReadPoolIO rpio(baitrp);
      rpio.setAttributeFASTAQualFileWanted(false); // in case we load FASTAs
      for(auto & bfn : MB_baitfiles){
	uint8 ziptype=0;
	string ft;
	string dummyfromstem;
	string dummypathto;
	guessFileAndZipType(bfn,dummypathto,dummyfromstem,ft,ziptype);
	if(checkFromType(ft)==0){
	  if(MB_baitfromtype.empty()){
	    ft="fasta";
	  }else{
	    ft=MB_baitfromtype;
	  }
	}

	rpio.registerFile(ft,bfn,"",rgid,false);
	rpio.loadNextSeqs(-1,-1);
      }

      cout << "baitrp.size(): " << baitrp.size() << endl;

      string resultfn("hashstat.mhs.gz");
      mbhs.prepareHashStatistics(baitrp,false,false,MB_fwdandrev,1,0,MB_basesperhash,
				 MB_Pv[0].getHashStatisticsParams().hs_million_hashes_per_buffer,
				 resultfn,".");
      MB_baitpoolsize=baitrp.size();
      if(mbhs.getNumHashEntries()==0){
	cout << FmtText::makeTextSign("WARNING: not a single kmer bait could be generated. This is due to the sequences you are using to bait are all either too short or contain too many closely located IUPAC codes.\nThis may be right, but most probably is not. If not: either check your bait sequences in the input files or choose a lower kmer size.") << endl;
      }
    }
  }

  mbhs.showHashStatisticsInfo();

  MB_workqueue.resize(1);
  auto qI=MB_workqueue.begin(); // fixed atm

  ReadPoolIO rpio1(qI->rp1);
  ReadPoolIO rpio2(qI->rp2);
  rpio1.setAttributeFASTAQualFileWanted(false); // in case we load FASTAs
  rpio2.setAttributeFASTAQualFileWanted(false); // in case we load FASTAs
  rpio1.setAttributeFASTQQualOffset(33); // in case we load FASTQs
  rpio2.setAttributeFASTQQualOffset(33); // in case we load FASTQs
  ReadGroupLib::ReadGroupID rgid=ReadGroupLib::newReadGroup();
  rgid.setSequencingType(ReadGroupLib::SEQTYPE_SOLEXA);

  auto ifI=MB_infiles.begin();
  while(ifI!=MB_infiles.end()){
    uint8 ziptype=0;
    rpio1.setAttributeProgressIndicator(true);
    MB_files.infilename1.clear();
    MB_files.infilename2.clear();

    MB_files.intype1=setupRPIO(*ifI,rgid,rpio1,ziptype);
    MB_files.writetype=MB_files.intype1;
    if(MB_mergeoutput){
      if(ifI==MB_infiles.begin() && MB_tortype==Read::AS_ILLEGAL){
	MB_tortype=MB_files.intype1;
      }
    }
    if(MB_tortype!=Read::AS_ILLEGAL){
      MB_files.writetype=MB_tortype;
    }
    setupOutfiles(*ifI,MB_files.intype1,ziptype,MB_files.hitfout1,MB_files.missfout1);
    MB_files.infilename1=*ifI;

    qI->pairstatus=PS_NOPAIR;
    if(!MB_filepairinfo.empty()){
      if(MB_filepairinfo.front()=='P'){
	qI->pairstatus=PS_INTERLEAVE;
      }else if(MB_filepairinfo.front()=='p'){
	++ifI;
	if(ifI==MB_infiles.end()){
	  MIRANOTIFY(Notify::FATAL,"Something's wrong here: -p says to expect one further file, but " << *(--ifI) << " is the last file seen on the command line?");
	}
	rpio1.setAttributeProgressIndicator(false);
	rpio2.setAttributeProgressIndicator(true);
	MB_files.intype2=setupRPIO(*ifI,rgid,rpio2,ziptype);
	setupOutfiles(*ifI,MB_files.intype2,ziptype,MB_files.hitfout2,MB_files.missfout2);
	MB_files.infilename2=*ifI;
	qI->pairstatus=PS_2FILES;
      }
      MB_filepairinfo.pop_front();
    }

    ++ifI;

    while(rpio1.loadNextSeqs(500)){
      if(qI->pairstatus==PS_2FILES){
	rpio2.loadNextSeqs(500);
	if(qI->rp1.size() != qI->rp2.size()){
	  MIRANOTIFY(Notify::FATAL,"Something's wrong here: -p says you have two files with reads paired across both files. But file " << MB_files.infilename1 << " does not have the same number of reads as file " << MB_files.infilename2 << " ???");
	}
      }

      // name or template checks
      {
	if(qI->pairstatus==PS_2FILES){
	  for(uint32 rpi=0; rpi<qI->rp1.size(); ++rpi){
	    if(qI->rp1[rpi].getName()!=qI->rp2[rpi].getName()
	       && qI->rp1[rpi].getTemplate()!=qI->rp2[rpi].getTemplate()){
	      MIRANOTIFY(Notify::FATAL,"Paired end files not synchronised: read name " << qI->rp1[rpi].getName() << " not equal to " << qI->rp2[rpi].getName() << " and templates also do not match: " << qI->rp1[rpi].getTemplate() << " vs " << qI->rp2[rpi].getTemplate());
	    }
	  }
	}else if(qI->pairstatus==PS_INTERLEAVE){
	  for(uint32 rpi=0; rpi<qI->rp1.size(); rpi+=2){
	    if(qI->rp1[rpi].getTemplate()!=qI->rp1[rpi+1].getTemplate()){
	      MIRANOTIFY(Notify::FATAL,"Interleaved paired end file apparently not cleanly interleaved: read template " << qI->rp1[rpi].getTemplate() << " not equal to " << qI->rp1[rpi+1].getTemplate());
	    }
	  }
	}
      }

      baitReads(mbhs,qI->rp1,qI->take1);
      if(qI->pairstatus==PS_2FILES){
	baitReads(mbhs,qI->rp2,qI->take2);

	// take both reads in dual load
	auto t1I=qI->take1.begin();
	auto t2I=qI->take2.begin();
	for(; t1I != qI->take1.end(); ++t1I, ++t2I){
	  if(*t2I) *t1I=1;
	  if(*t1I) *t2I=1;
	}
      }

      if(qI->pairstatus==PS_INTERLEAVE){
	// take both reads in interleaved
	auto sI=qI->take1.begin();
	auto eI=sI+1;
	for(; sI!=qI->take1.end(); sI+=2, eI+=2){
	  if(*sI | *eI){
	    *sI=1;
	    *eI=1;
	  }
	}
      }

      saveWQueueElement(*qI);

      Read::trashReadNameContainer();
      qI->rp1.discard();
      qI->rp2.discard();
    }
    if(qI->pairstatus==PS_2FILES){
      rpio2.loadNextSeqs(1);
      if(qI->rp2.size()){
	MIRANOTIFY(Notify::FATAL,"File (bla2) more reads than file (bla)???");
      }
    }
  }

  cout << endl;
}


int MiraBait::mainMiraBait(int argc, char ** argv)
{
  //CALLGRIND_STOP_INSTRUMENTATION;

  FUNCSTART("int mainMiraBait(int argc, char ** argv)");

  int c;
  extern char *optarg;
  extern int optind;


  string path;
  string convertprog;
  splitFullPathAndFileName(argv[0],path,convertprog);

  string miraparams;

  MB_basesperhash=0;
  MB_baitpoolsize=0;

  MB_filepairinfo.clear();    // p = 2 files, P = 1 file interleave

  while (true){
    static struct option long_options[] =
      {
	{"help",  no_argument,           0, 'h'},
	{"version", no_argument,         0, 'v'},
	{"loadhsf", no_argument,    0, 'L'},
	{0, 0, 0, 0}
      };
    /* getopt_long stores the option index here. */
    int option_index = 0;

    int c = getopt_long (argc, argv, "hcdiIpPrva:b:f:F:k:L:n:N:o:O:t:",
			 long_options, &option_index);
    if(c == -1) break;

    switch (c) {
    case 'a': {
      miraparams=optarg;
      break;
    }
    case 'b': {
      MB_baitfiles.push_back(optarg);
      break;
    }
    case 'c': {
      MB_changeseqcase=false;
      break;
    }
    case 'd': {
      MB_deletestaronlycolumns=true;
      break;
    }
    case 'f': {
      MB_fromtype=optarg;
      break;
    }
    case 'F': {
      MB_baitfromtype=optarg;
      break;
    }
    case 'i': {
      MB_wantbaithits=false;
      MB_wantbaitmiss=true;
      break;
    }
    case 'I': {
      MB_wantbaithits=true;
      MB_wantbaitmiss=true;
      break;
    }
    case 'k': {
      uint32 bla=atoi(optarg);
      MB_basesperhash=bla;
      break;
    }
    case 'L': {
      MB_hashstatfname=optarg;
      break;
    }
    case 'n': {
      MB_numbaithits=atoi(optarg);
      break;
    }
    case 'N': {
      MB_nameprefix=optarg;
      break;
    }
    case 'o': {
      MB_wantbaithits=true;
      MB_hitpath=optarg;
      break;
    }
    case 'O': {
      MB_wantbaitmiss=true;
      MB_misspath=optarg;
      break;
    }
    case 'p' :
    case 'P' : {
      MB_filepairinfo.push_back(c);
      break;
    }
    case 'r': {
      MB_fwdandrev=false;
      break;
    }
    case 't': {
      MB_totype=optarg;
      break;
    }
    case 'h':
    case '?': {
      usage();
      exit(0);
    }
    case 'v':
      cout << MIRAVERSION << endl;
      exit(0);
    default : {}
    }
  }

  if(MB_baitfiles.empty() && MB_hashstatfname.empty()){
    usage();
    cout << endl;
    cerr << argv[0] << ": " << "No bait files defined via -b and no -L given!\nDid you use the command line for the old mirabait (<= 4.0.2)?\n";
    exit(1);
  }

  if(argc-optind < 1) {
    usage();
    cout << endl;
    cerr << argv[0] << ": " << "Missing files to work on!\n";
    exit(1);
  }

  for(;optind<argc;++optind){
    MB_infiles.push_back(argv[optind]);
  }

  if(!MB_fromtype.empty() && !checkFromType(MB_fromtype)){
    usage();
    cout << endl;
    cerr << "Unknown or illegal file type '" << MB_fromtype << "' defined as <fromtype>\n";
    exit(1);
  }
  string foundfromtype(MB_fromtype);
  if(MB_fromtype.empty()){
    for(auto & fname : MB_infiles){
      uint8 ziptype=0;
      string ft;
      string dummyfromstem;
      string dummypathto;
      guessFileAndZipType(fname,dummypathto,dummyfromstem,ft,ziptype);
      if(!ft.empty()) {
	if(checkFromType(ft)==0){
	  usage();
	  cout << endl;
	  cerr << "Unknown or illegal file extension '" << ft << "' in file name " << fname << "\n";
	  exit(1);
	}
	if(foundfromtype.empty()) foundfromtype=ft;
      }
    }
  }

  MB_tortype=Read::AS_ILLEGAL;
  if(!MB_totype.empty()){
    MB_tortype=checkToType(MB_totype);
    if(MB_tortype==Read::AS_ILLEGAL){
      cerr << "Unknown or illegal format '" << MB_totype << "' defined as to-type\n";
      exit(1);
    }
  }

  if(!MB_hitpath.empty()){
    boost::filesystem::path finaldest(walkSymLinks(MB_hitpath));
    bool setmerge=true;
    if(boost::filesystem::exists(finaldest)){
      if(boost::filesystem::is_directory(finaldest)) {
	setmerge=false;
	if(MB_hitpath.back()!='/') MB_hitpath+='/';
      }
    }
    MB_mergeoutput=setmerge;
    if(setmerge) fileRemove(finaldest.string(),false);
    if(boost::filesystem::exists(finaldest)){
      cout.flush();
      cerr << "\n\nCould not remove file " << finaldest << "\nIs it writable?";
      exit(10);
     }
  }
  if(!MB_misspath.empty()){
    boost::filesystem::path finaldest(walkSymLinks(MB_misspath));
    bool setmerge=true;
    if(boost::filesystem::exists(finaldest)){
      if(boost::filesystem::is_directory(finaldest)) {
	setmerge=false;
	if(MB_misspath.back()!='/') MB_misspath+='/';
      }
    }
    MB_mergeoutput=setmerge;
    if(setmerge) fileRemove(finaldest.string(),false);
    if(boost::filesystem::exists(finaldest)){
      cout.flush();
      cerr << "\n\nCould not remove file " << finaldest << "\nIs it writable?";
      exit(10);
     }
  }

  MIRAParameters::setupStdMIRAParameters(MB_Pv);
  if(!miraparams.empty()){
    cout << "Parsing special MIRA parameters: " << miraparams << endl;
    MIRAParameters::parse(miraparams,MB_Pv,false);
    cout << "Ok.\n";
  }

  if(MB_numbaithits>0){
    cout << "Baiting sequences with at least " << MB_numbaithits << " exact kmer matches.\n";
  }else{
    cout << "Baiting sequences allowing for " << -MB_numbaithits << " missed kmer matches over the sequence length.\n";
  }

  try{
    // find out which size of hash we are going to work with
    uint32 sizeofhash=0;
    if(MB_hashstatfname.empty()){
      if(MB_basesperhash==0) MB_basesperhash=31;
      sizeofhash=HashStatistics<vhash64_t>::byteSizeOfHash(MB_basesperhash);
    }else{
      auto mhs=HashStatistics<vhash64_t>::loadHashStatisticsFileHeader(MB_hashstatfname);
      sizeofhash=mhs.sizeofhash;
      if(MB_basesperhash>0 && MB_basesperhash != mhs.basesperhash){
	MIRANOTIFY(Notify::FATAL,"-k specified on the command line (" << MB_basesperhash << ") is different than the kmer size saved (" << mhs.basesperhash << ") in the hash statistics file. This is treated as error, bailing out.");
      }
      MB_basesperhash=mhs.basesperhash;
      cout << "Size of kmers in file " << MB_hashstatfname << ": " << MB_basesperhash << endl;
    }

    if(MB_basesperhash>256){
      cout << "Sorry, the max. kmer size supported atm is 256.\n";
      exit(10);
    }

    if(sizeofhash==8){
      HashStatistics<vhash64_t> hs;
      doBaitWithHS(hs);
    }else if(sizeofhash==16){
      HashStatistics<vhash128_t> hs;
      doBaitWithHS(hs);
    }else if(sizeofhash==32){
      HashStatistics<vhash256_t> hs;
      doBaitWithHS(hs);
    }else if(sizeofhash==64){
      HashStatistics<vhash512_t> hs;
      doBaitWithHS(hs);
    }else{
      BUGIFTHROW(true,"sizeofhash == " << sizeofhash << " is rather unexpected.");
    }

  }
  catch(Notify n){
    n.handleError("main");
  }
  catch(Flow f){
    cerr << "Unexpected exception: Flow()\n";
  }
  catch(...){
    cerr << "Unknown exception caught, aborting the process.\n\nPlease contact: bach@chevreux.org\n\n";
    abort();
  }

  if(MB_numclippedreadsinload){
    cout << FmtText::wordWrap("\nNOTICE! You baited sequences which had clipping information (CAF or MAF). Mirabait will have baited *only* in the unclipped parts of the sequences (which are thought to represent 'good, viable' sequence).\n");
  }

  cout << "\nBaiting process finished.\n\n";
  if(MB_baitpoolsize>0){
    cout << "Number of bait sequences:   " << MB_baitpoolsize << endl;
  }
  cout << "Total number of sequences read: " << MB_numreadsread << endl;
  cout << "Pairs baited: " << MB_numpairsbaited << " (" << fixed << setprecision(2) << 100.0f/MB_numreadsread*MB_numpairsbaited*2 << "%)\n";
  cout << "Pairs missed: " << MB_numpairsmissed << " (" << fixed << setprecision(2) << 100.0f/MB_numreadsread*MB_numpairsmissed*2 << "%)\n";
  cout << "Unpaired baited: " << MB_numunpairedbaited << " (" << fixed << setprecision(2) << 100.0f/MB_numreadsread*MB_numunpairedbaited << "%)\n";
  cout << "Unpaired missed: " << MB_numunpairedmissed << " (" << fixed << setprecision(2) << 100.0f/MB_numreadsread*MB_numunpairedmissed << "%)\n";

  FUNCEND();
  return 0;
}
