#include "nullModelClass.h"

nullModelClass_t::nullModelClass_t() {
    status = false;
}
nullModelClass_t::nullModelClass_t(string param_nullMarkovOrder) {
  //string fname = "./data/protSeqNullMMLEstimatesOrder" + param_nullMarkovOrder + ".txt";
//  string fname = "./data/UniProtHSNullMMLEstimatesOrder" + param_nullMarkovOrder + ".txt";
string fname = "data/MML0_estimates_uniprot-all.fasta";
  /* Assumes fname file holds the null probabilities in a 
   * two column space separated format, where:
   * First column holds a string containing the context
   * ollowed by the letter whose null model probability
   * is shown in the    SECOND   column.
   *
   * Read fname file under this assumption*/
  ifstream infile(fname.c_str(),ios::in);
  assert(infile);
  char buff[10000];
  while (!infile.eof()) {
      infile.getline(buff,10000);
      if (infile.eof() == true) break;
      stringstream ss;
      ss << buff;
      string c;
      double p;
      ss >> c;
      ss >> p;
      ctx2pr.insert(pair<string,double>(c,p));
  }
  infile.close();

  /* Infer alphabet from context strings, and at the same time */
  /* ensure that all context strings are of the same length */
  map<string,double>::iterator it = ctx2pr.begin();
  string ctx = it->first;
  size_t ctxlen = ctx.length();
  size_t nctxs = 0;
  alphabet = "";
  alphabetSize = 0;
  for (it=ctx2pr.begin(); it != ctx2pr.end(); it++) {
    ctx = it->first;
    size_t l = ctx.length();
    if (l != ctxlen) {
        cerr << "Found a context " << ctx << " of deviating length.\n";
        cerr << "All contexts should be of the same length. Terminating!\n";
        exit(1);
    }
    for (int i = 0; i < ctx.length(); i++) {
        map<char,int>::iterator aait;
        aait = alphabetMap.find(ctx[i]);
        if (aait == alphabetMap.end()) {
            alphabet += ctx[i];
            //cout << alphabet << endl;
            alphabetSize++;
            alphabetMap.insert(pair<char,int>(ctx[i],(int)1));
        }
        else {
            (aait->second)++;
        }
    }
    nctxs++;
  }

  /* set order of the null markov model */
  order = ctxlen-1;
  status = true;

  //for seqmmligner work
  if (order !=0) {
      cerr << "Error: null model is not zeroth order.\n"
          << " Cannot continue\n";
      exit(1);
  }

  /* ensure that the file contains all probabilities needed 
   * for the given alphabet */
  size_t tot = pow((double)alphabetSize,(int)(order+1));
  if (tot != nctxs) {
      cout << alphabetSize << " " << order << " " << tot << " " << nctxs << endl;
  }
  assert(tot == nctxs);

  normalize();
}

//ensure all probabilities add up to 1;
void nullModelClass_t::normalize() {
  assert(order == 0);
  map<string,double>::iterator it = ctx2pr.begin();
  double sum = 0.0;
  for (it=ctx2pr.begin(); it != ctx2pr.end(); it++) {
    sum += it->second;
  }
  //normalize
  for (it=ctx2pr.begin(); it != ctx2pr.end(); it++) {
    it->second /= sum;
  }
}


double nullModelClass_t::pr(string ctx) {
    assert(ctx.length()-1 == order); 
    map<string,double>::iterator it;
    it = ctx2pr.find(ctx);
    assert(it != ctx2pr.end());
    return ctx2pr[ctx];
}

void nullModelClass_t::set_pr(string ctx, double pr) {
    assert(ctx.length()-1 == order); 
    map<string,double>::iterator it;
    it = ctx2pr.find(ctx);
    assert(it != ctx2pr.end());
    ctx2pr[ctx] = pr;
}

size_t nullModelClass_t::getOrder() {
    return order;
}

string nullModelClass_t::getAlphabet() {
    return alphabet;
}

int nullModelClass_t::getAlphabetSize() {
    return alphabet.length();
}

bool nullModelClass_t::isCompatible(string otherAlphabet) {
    for (int i = 0; i < otherAlphabet.length(); i++) {
        map<char,int>::iterator aait;
        aait = alphabetMap.find(otherAlphabet[i]);
        if (aait == alphabetMap.end()) return false;
    }
    return true;
}
