#include #include #include #include #include #include #include #include #include #include //for chdir #include #include #include #include #include //for transform #include "nr3.h" //for shape from "Numerical Recipes in C" W. H. Press et al #include "svd.h" //for shape //NEEDS C++11 #define NUMBINS 40 //R HISTO #define MAXDIST 40 //R HISTO #define NUMBINSRG 20 //RG HISTO set to MAXDISTRG (see explanation below under HISTO #define MAXDISTRG 20 //RG HISTO "standard one used for PEP_X #define NUMBINSETOE 40 //RG HISTO set to MAXDISTRG (see explanation below under HISTO #define MAXDISTETOE 40 //RG HISTO "standard one used for PEP_X //how many samps of input Ens to keep instead of use them all //#define NUMSAMPSUSED 10 #define WRITEINTERVAL 10 //for pdbs and RG_EXT #define RANGE 0.0//RN (-range/2,range/2) //if want to "fuzz out" phi/psi angles. #define DISCUT 2.4 //similar to min dis in MD FF - van der waals >3rd neighbor exclusion using namespace std; //program DihedToCC.cpp //PURPOSE: Convert dihed angles to CCs and evaluate properties. //SET CCs of atoms A,B,C and dihed angles, bond distances, angles //IN: Ens*.dat: Into array Phi[numSamp][numRes] and Psi[numSamp][numRes] //(numSamp samples of Phi and Psi diheds for numRes residues generated externally) //METHOD: init with CCs of A,B,C and: CD bond distances, BCD angles and some BC dihed. Return D CCs //Then, cp B,C,D CCs to A,B.C CCs, respectively. For read in BC dihed, CD bond dist, BCD angle //get D CCs. Also write to E the carbonyl oxygen. //Iterate this to form the chain CCs. //METHOD generate all atom distances for vdw clashes. //Use mapAt[m]=at+1 so written out atoms are 1-based for comparison to pdbs //METHOD then delete pdbs based on clash rejection. //N.B. skip O1-O2 and O1-N2 from vdw clash because init and read in dihPsi aren't "connected" //get RG (use same mass value since N,C,CA,O close enough) and EtoE //RG = sqrt (sum squares of pos (CC[at][i]) - COM)/sqrt(numAtoms) //EtoE dist of CA res first to last //N.B. bondAngle = (bondAngle-180); That means geometry A-B-C (in a row) //corresponds to input angle of 180 degrees. Then 90 degrees is B-C rotated up and 270 //degrees B-C rotated down relative to A-B. //init psi_0 and cycle over (omega1 phi_1 psi_1) (omega2 phi_2 psi_2).... //Get "HB" for alpha helix using res1(O)-res4(N) and for PPII res1(O)-res3(N). //Using atom 1 etc numbering //1-4: starts with atoms 4 - 13 (res1-res4) //1-3: starts with atoms 4 - 9 (res1-res3) //OUTFILES //OUT: cout has list of dihed angles used for each state and if they exist //distances less than disCut. //OUT: /DATA/*.pdb for each kept state //N.B. have to mv /DATA to something, else program won't work. //set range if want to "fuzz out" phi and psi angles //OUT: *_RG_EXT.txt RG delta and EtoE by kept sample //OUT: *_PofR.txt P(R) pdf of R (distance distribution) //OUT: *_PofRG.txt P(RG) pdf of RG distribution //OUT: *_PofEtoE.txt P(EtoE) pdf of EtoE distribution /* COUT: End of log Shape information - based on expressions in: Vitalis, A.; Wang, X. L.; Pappu, R. V., Quantitative Characterization of Intrinsic Disorder in Polyglutamine: Insights from Analysis Based on Polymer Theories. Biophys. J. 2007, 93, 1923-1937 Dima, R. I.; Thirumalai, D., Asymmetry in the Shapes of Folded and Denatured States of Proteins. J. Phys. Chem. B 2004, 108, 6564-6570 RGsq: Shapes[0]=RGsq delta: asphericity (ranges (0,1) Shapes[1] = 1.0-3.0*(eigVals[0]*eigVals[1]+eigVals[1]*eigVals[2]+eigVals[2]*eigVals[0]) /(Shapes[0]*Shapes[0]); S: shape parameter (ranges -1/4, 2) Shapes[2]=27.0*numerator/denom; double lambAve=Shapes[0]/3.0; double numerator= (eigVals[0]-lambAve)*(eigVals[1]-lambAve)*(eigVals[2]-lambAve); double denom = pow(Shapes[0],3); percent HB14 (alphaHelix) percent HB13 (PPII) */ void refoldNRF(double*, double*, double*, double*, double, double, double, double); void writeIt(char*, int, int, double*); //void fwriteIt(ofstream &, char*, int, int, double*); void fwriteIt(ofstream &, char*, int, int, double*, vector); double getdist(double*, double*); vector getPofR(double*, int, int, double); void getCovMatrix(double**, int, double**); //in CC and numAtoms; out covMatrix vector getEigvals(double**, int); vector getShapes(vector , int); vector getHistoRG(double, int, double); vector getHistoEtoE(double, int, double); double getJHNH(double); //for NMR string oneToThree(string); //convert fasta to three letter main() { string sdirInputs ="/path/analyticEnsOutDirectory"; string iFileName="Ensemble_nR_9_TCB.dat"; //name in the above dir chdir(sdirInputs.c_str()); system("echo -n '1. Data In Directory is '; pwd"); //put results in named dir string sdirOut ="/path/DihedToCCOutDirectory"; int numSamps; //read from IFILE int numRes; //read from, ILFILE int numDoub; //numRes-1 double range=RANGE; //for RN (-range/2,range/2) double disCut=DISCUT;//for vdw //for histo P(R) int numBins=NUMBINS; double maxDist=MAXDIST; double delBin =maxDist/numBins; double delBin2=delBin/2.0; vector cumeHisto(numBins); //to cume histo for P(R) //for histo of P(RG) int numBinsRG=NUMBINSRG; double maxDistRG=MAXDISTRG; double delBinRG =maxDistRG/numBinsRG; double delBin2RG=delBinRG/2.0; vector cumeHistoRG(numBinsRG); //to cume histo for p(RG) double cumeRG=0.0; //cume RG for write to cout int numBinsEtoE=NUMBINSETOE; double maxDistEtoE=MAXDISTETOE; double delBinEtoE =maxDistEtoE/numBinsEtoE; double delBin2EtoE=delBinEtoE/2.0; vector cumeHistoEtoE(numBinsEtoE); //to cume histo of EtoE int numShapes=3; //how many shape parameters to write vector cumeShapes(numShapes,0); int numSampsKept=0; //count how many samps kept (no overlap) //read Phi Psi from Ensemble generated by realization_dataBase.cpp ifstream iFile; //string iFileName="Ensemble_nR_9_TCB.dat"; iFile.open(iFileName.data(),ios::in); assert(iFile.is_open()); string drop; string sequenceName; string line; while (iFile >> std::ws && std::getline(iFile, line)); // skip empty lines and read last line numSamps=std::stoi(line); //total number samps in Ensemble for realz cout<<"there are "<>drop>>numRes>>drop>>sequenceName; numDoub=numRes-1; int lmax=numDoub*3; int numAtoms=4*(numDoub+1);//for CC 4 atoms * number of residues double **Phi; //Phi[numSamps][numRes] Phi = new double*[numSamps]; for(int ns=0;ns>Phi[ns][nd]; for(int nd=0;nd>Psi[ns][nd]; } iFile.close(); /* //debug in case want to check input properly read for(int ns=0;ns (&(ostringstream() <str(); string ofname=sequenceName+"_ENS_"+Sens+"K.pdb"; //OUTFILE all in one file ofstream ofile(ofname.data()); if(!ofile.is_open()){ cout< name; char tempCString[2]; //do a fasta string string str=sequenceName; for(std::string::iterator it = str.begin(); it != str.end(); ++it) { tempCString[0]=*it; //convert to C-string of length 1 string answ = oneToThree(tempCString); answ=answ.substr(0, 3); //pull out 3 letter code name.push_back(answ); } //constants set phi/psi to alpha helix omega to planar //for carbonyl O (CO) dihed use read dihPsi+180 double bndOmg=1.46; double bndPhi=1.51; double bndPsi=1.33; double angOmg=122; double angPhi=111; double angPsi=116; double dihOmg=180; double dihPhiBase=-60; double dihPsi=0; double bndCO=1.24; double angCACO=120.5; double dihNCACO=180+dihPsi; //place O double dihPhi; //atom CC vectors double *a, *b, *c, *d, *e; a = new double[3]; b = new double[3]; c = new double[3]; d = new double[3]; e = new double[3]; //e for write O //CCs stored for eliminate vdw clashes for(int at=0;at3) { mapAt[m]=at+1; mapAtp[m]=atp+1; dist = getdist(CC[at],CC[atp]); distArray[m]=dist; m++; } //write out short distances with their atom pairs m=0; int cntBadDist=0; // cout<<"for samp = "<0 && distArray[m] eigs; eigs = getEigvals(CC, numAtoms); vector Shapes(numShapes,0); Shapes = getShapes(eigs,numShapes); //cume the shapes transform(cumeShapes.begin(),cumeShapes.end(),Shapes.begin(),cumeShapes.begin(), plus()); double EtoE=0.0; int firstCA=1; //0-based indexing int lastCA=4*(numDoub+1)-1; //0-based indexing EtoE = getdist(CC[firstCA],CC[lastCA]); //CAs of first and last CA //cumulate HistoREtoEover samps vector HistoEtoE = getHistoEtoE(EtoE, numBinsEtoE, maxDistEtoE); for(int bn=0;bn Histo = getPofR(distArrayAll,numDist, numBins, maxDist); //cumulate Histo over samps for(int bn=0;bn HistoRG = getHistoRG(RGSq, numBinsRG, maxDistRG); for(int bn=0;bn) double RG_from_PofR=0.0; double normHisto=0.0; for(int bn=0;bn1e-10) ofileNMR< name) { ofile<<"ATOM "< getPofR(double* distArray, int numDist, int numBins, double maxDist){ //histo stuff double minDist=0.0; double delBin =maxDist/numBins; double delBin2=delBin/2.0; int hisBin; //local bn variable double value; int hisCount=0; vector Histo(numBins); for(int bn=0;bn=0 && hisBin getEigvals(double **CC,int numAtoms){ vector eigVals; //VecDoub eigVals(3); MatDoub covMatrix(3,3); for(int i=0;i<3;i++) for(int j=0;j<3;j++) covMatrix[i][j]=0.0; //get aves first VecDoub ave(3); for(int i=0;i<3;i++) ave[i]=0.0; for(int i=0;i<3;i++){ for(int at=0;at getShapes(vector eigVals,int numShapes){ vector Shapes(numShapes,0); double temp=0.0; for(int i=0;i<3;i++) temp += eigVals[i]; Shapes[0]=temp; //RGsq //delta: asphericity Shapes[1] = 1.0-3.0*(eigVals[0]*eigVals[1]+eigVals[1]*eigVals[2]+eigVals[2]*eigVals[0]) /(Shapes[0]*Shapes[0]); // S: shape parameter double lambAve=Shapes[0]/3.0; double numerator= (eigVals[0]-lambAve)*(eigVals[1]-lambAve)*(eigVals[2]-lambAve); double denom = pow(Shapes[0],3); Shapes[2]=27.0*numerator/denom; return Shapes; } vector getHistoRG(double RGsq, int nB, double maxD){ //histo stuff double minDist=0.0; double delBin =maxD/nB; double delBin2=delBin/2.0; int hisBin; //local bn variable double value; int hisCount=0; vector Histo(nB); for(int bn=0;bn=0 && hisBin getHistoEtoE(double EtoE, int nB, double maxD){ //histo stuff double minDist=0.0; double delBin =maxD/nB; double delBin2=delBin/2.0; int hisBin; //local bn variable double value; int hisCount=0; vector Histo(nB); for(int bn=0;bn=0 && hisBin