#include #include #include #include #include #include #include #include #include #include "math.h" #include #include #include #include //timer stuff #include #include #include //for map container //NOW NEED g++11 #include //sort from stl #include //for accumulate #define NUMVECS int(1e8) //how many vectors to create //#define NUMVECS int(1e9) //how many vectors to create #define DENOM 10 //for ensemble construction. ratio of max to smallest prob /* DISCLAIMER: THIS SOFTWARE IS MADE FREELY AVAILABLE. NO GUARANTEES ARE MADE REGARDING ITS CORRECTNESS OR UTILITY. PURPOSE: GENERATE AN ENSEMBLE OF PEPTIDE/PROTEIN BACKBONE CONFORMATIONS. FOR USE OF THIS ENSEMBLE TO EVALUATE VARIOUS OBSERVABLES OF THE ENSEMBLE. (see program dihedtocc.cpp) IN: condProbKmeans_TCB_ALL.dat holds the pair conditional database. see OUTS below. SEE README.TXT FOR HELP AND AN EXAMPLE. IF USED CITE XXXX */ /* Need g++11 for map program analyticEns.cpp N.B. DUNBRACK prob1First[s]=1.0; //DUNBRACK - no info on first residue PURPOSE: Sample states of a Markov chain with probabilities W(x1,x2,....xn)=P(xn|xn-1)..P(x2|x1)W(x1) For given sequence, create the chain states and probabilities For each fixed s1 value generate a state with prob = product of condProb(each doublet) * prob(1) in proportion to the P(s2|s1) values in the condProbKmeans_TCB_ALL.dat table. Note: Sum s2 P(s2|s1)=1 so normed already for each s1. PARAMETERS: see begin of main DENOM ratio of highest to lowest accepted probability INPUT: (this dir) read condProbs DUNBRACK from condProbKmeans_ALL.dat file Line 1 XY Line 2 condProbKmeans[k] k in K centroid[k] k in K (phi/psi each k) Line 3 ... Line K... METHOD construct [doublets][probs] for input sequence construct corresponding [doublets][numClus] for the sequence The superstates in condProbKmeans_ALL.txt are ordered with s2 varying first DUNBRACK s1 =1 always. (see N.B. below) generate 2^nD (nD =1,2, ..numDihed) succesive states using Each round, first cp the vecs from previous round then push_back possible rot values from condProb and evaluate prodProb. Descend sort (state, prodProb) on prodProb and delete if too small. DENOM controls relative prodProb to maxProb If accepted keep on list of currrent states. if prob too small drop from next round construction. Do until sequences are generated along with their probs. OUTS: cout: to a check file for number sequence states as constructed in chain. Possible number of sequence states and number constructed based on DENOM. Total kept probability. mapPhiPsi.dat: the state phi psi correspondence from read data StatesAndProbsDescend_*.dat unique states and their probs in descend order. Ensemble_*dat The ensemble of phi/psi dihedrals such that there are e.g. 9 ( for DENOM 10) for the highest prob down to e.g. 1 for the lowest accepted prob. */ /* N.B. for conditionals SUPERSTATE (SS) MATCH TO STATES S1 S2 SS S1 S2 0 0 0 1 0 1 2 1 0 3 1 1 4 2 0 5 2 1 where outer loop over Z1, inner loop over Z2 to match ss N.B. KEY POINT - don't need vecs vecs1 run 0,1,..numVecs1 (same for 2) those integers are equally well covered by loops over s1 and s2. ss=numStates2*s1+IndexStates2[s2]; (with ss++) goes over the above superstates. */ using namespace std; int main() { //dirs //put results in named dir string sdirResults ="/path/analyticEnsOutDirectory"; const int numDataBase=420; //the number of entries in condProbKmeans_ALL.dat (exclude "X_*") //sequence to be simulated const string resName ("EGAAWAASS"); //fasta sequence //PEP_W int numRes=static_cast(resName.length() ); const int numDoub=numRes-1; int denom = DENOM; //for ensemble construction int numVecs = NUMVECS; //for fileNames string extnR=static_cast( &(ostringstream() << numRes) )->str(); //for dataBase read const int numSupStatesOver=20; //overdim to use for variable numSupStates in nr loop double **probCondSeq; //[numDoub][numStates] probCondSeq=new double*[numDoub]; for (int nd=0;nd>doubNameData>>numStates2>>numStates1; double probTemp[numStates2], phiTemp[numStates2], psiTemp[numStates2]; int numSupStatesTemp=numStates2*numStates1; for (int s=0;s>probTemp[s]>>phiTemp[s]>>psiTemp[s]; if(doubName==doubNameData){ ns1[nd]=numStates1; ns2[nd]=numStates2; numSupStates[nd]=numSupStatesTemp; for (int s=0;s temp(numSupStates[nd]); for (int s=0;s::iterator result; std::vector::iterator result_min; result = std::max_element(temp.begin(), temp.end()); result_min = std::min_element(temp.begin(), temp.end()); double maxVal=temp[std::distance(temp.begin(), result)]; double minVal=temp[std::distance(temp.begin(), result_min)]; maxProdCondProb *= maxVal; minProdCondProb *= minVal; } double probCutOff=maxProdCondProb/denom; //Keep e.g. first decade of probs cout<<"maxProdCondProb="< > Vecs(numVecs); for(int i=0;i row; Vecs.push_back(row); } //set base vecs with one vector of size 1 and prob 1 vTop++; for (int nv=0;nvnumVecs-100){ cout<<"for nD+1="<>> A; //for sort of vectors by prodCondProb for(int nv=0;nv