#!/usr/bin/perl # # AMIGOS # Carlos M Duarte, Copyright 1998, 1999, 2003 # A program to measure standard and non-standard torsion angles for # nucleotides in standard PDB formatted files # Last modified 08/30/03; version 0.3 # $PDBSFX = ".pdb"; $pi = atan2(1,1) * 4; $PDBSPRD = "rna_sprd.txt"; $PDBAREA = "rna_area.txt"; open (ALLOUT, ">>all_sprd.txt")|| die "coupldn't open all_sprd.txt\n"; printf ALLOUT"ResNo\tResID\tType\tETA\tTHETA\tCHI\tALPHA\tBETA\tGAMMA\tDELTA\tEPSILON\tZETA\tNUTWO\tTYPEA\n"; open (ALLAREA, ">>all_area.txt")|| die "couldn't open all_area.txt\n"; printf ALLAREA"ResNo\tResID\tType\tETA\tTHETA\tCHI\tALPHA\tBETA\tGAMMA\tDELTA\tEPSILON\tZETA\tNUTWO\tTYPEA\n"; # # The section below will open each file in a directory and then # open output files in whatever directory you are in. There are 4 types # of output files # 1) a large single file of all nts from all files # 2) a large single file of all nts which fall within user spec'd range # 3&4) for each RNA PDB file files there are corresponding output files # of the above type # print STDOUT "What directory are your structures file(s) in?\n"; $dir = ; chop ($dir); print $dir; opendir (THISDIR, $dir); @allfiles = readdir THISDIR; foreach $FILE (@allfiles) { $_ = $FILE; if (/pdb$/ || /ent$/) { print "$_\n"; $PDB = $dir . "/" . $FILE; print SPRDOUT "$FILE\n"; $FILEAREA = $FILE."_area.txt"; open (AREAOUT, ">>$FILEAREA")|| die "coupldn't open $FILEAREA \n"; printf AREAOUT"ResID\tETA\tTHETA\n"; print AREAOUT "$FILE\n"; print ALLAREA "$FILE\n"; # # The subroutine call "&MEASURE" calls the subroutine that processes the file # It is followed by measurement file output sequences # &MEASURE($PDB); print AREAOUT"This area has $AREACOUNT out of $RESCOUNT residues\n"; $AREATOTAL += $AREACOUNT; $RESTOTAL += $RESCOUNT; $FILEOUT = $FILE."_sprd.txt"; open (OUTFILE, ">>$FILEOUT")|| die "couldn't open $FILEOUT \n"; printf OUTFILE"ResNo\tResID\tType\tETA\tTHETA\tCHI\tALPHA\tBETA\tGAMMA\tDELTA\tEPSILON\tZETA\tNUTWO\tTYPEA\n"; for ($i = 1; $i <= $NUMRES; $i++) { if ($CPCP[$i] != 0 && $PCPC[$i] != 0) { printf OUTFILE"%-5d\t%-7s\t%-4s\t%6.1f\t%6.1f\t%6.1f\t%6.1f\t%6.1f\t%6.1f\t%6.1f\t%6.1f%\t%6.1f\t%6.1f\t%3d\n", $i, $RESNAM[$i], $RESTYP[$i], $CPCP[$i], $PCPC[$i], $CHI[$i], $ALPHA[$i], $BETA[$i], $GAMMA[$i], $DELTA[$i], $EPSILON[$i], $ZETA[$i], $NUTWO[$i], $AHLX[$i]; printf ALLOUT"%-5d\t%-7s\t%-4s\t%6.1f\t%6.1f\t%6.1f\t%6.1f\t%6.1f\t%6.1f\t%6.1f\t%6.1f\t%6.1f%\t%6.1f\t%3d\n", $i, $RESNAM[$i], $RESTYP[$i], $CPCP[$i], $PCPC[$i], $CHI[$i], $ALPHA[$i], $BETA[$i], $GAMMA[$i], $DELTA[$i], $EPSILON[$i], $ZETA[$i], $NUTWO[$i], $AHLX[$i]; } } } } print ALLAREA"This area has $AREATOTAL out of $RESTOTAL residues\n"; # # Start of the processing subroutine # # File is parsed according to format specified by the PDB (see PDB website) # sub MEASURE { open (PDBFILE, $PDB)|| die "Couldn't open $PDB\n"; @ATOM = 0; @RNA = 0; $ATNUM = 0; $NUMRES = 0; $RESCOUNT = 0; $NUMCHN = 0; $AREACOUNT = 0; while () { last if (/^END/); if (/^ATOM/) { chop $_; $FOO = substr($_,12,4); $BAR = substr($_,21,6); if ($FOO eq $ATNAM && $BAR eq $ATRES[$ATNUM]) { $INFORM = substr($_,0,27); print "$INFORM skipped - duplicate atom model\n"; next; } $ATNUM++; $ATOM[$ATNUM] = $_; $ATNAM = substr($_, 12, 4); $ATRES[$ATNUM] = substr($_, 21, 5); $CHTM[$ATNUM] = substr($_, 21, 1); $ATRTP[$ATNUM] = substr($_, 17, 3); if ($ATRES[$ATNUM] ne $ATRES[$ATNUM-1]) { $NUMRES++; $RESCOUNT++; $RESNAM[$NUMRES] = $ATRES[$ATNUM]; $RESSTRT[$NUMRES] = $ATNUM; $RESTYP[$NUMRES] = $ATRTP[$ATNUM]; } # # The following section assigns each atom type to arrays that will later # be used to run measurements # # examples of acceptable ribose atom names: C4' -or- C4* # if ($ATNAM eq " P ") { $PIDX[$NUMRES] = $ATNUM; } if ($ATNAM eq " C4'" || $ATNAM eq " C4*") { $C4IDX[$NUMRES] = $ATNUM; } if ($ATNAM eq " O4'" || $ATNAM eq " O4*") { $O4IDX[$NUMRES] = $ATNUM; } if ($ATNAM eq " C1'" || $ATNAM eq " C1*") { $C1IDX[$NUMRES] = $ATNUM; } if ($ATNAM eq " N1 " || $ATNAM eq " N1 ") { $N1IDX[$NUMRES] = $ATNUM; } if ($ATNAM eq " N9 " || $ATNAM eq " N9 ") { $N9IDX[$NUMRES] = $ATNUM; } if ($ATNAM eq " C2 " || $ATNAM eq " C2 ") { $C2IDX[$NUMRES] = $ATNUM; } if ($ATNAM eq " C4 " || $ATNAM eq " C4 ") { $C42IDX[$NUMRES] = $ATNUM; } if ($ATNAM eq " O3'" || $ATNAM eq " O3*") { $O3IDX[$NUMRES] = $ATNUM; } if ($ATNAM eq " O5'" || $ATNAM eq " O5*") { $O5IDX[$NUMRES] = $ATNUM; } if ($ATNAM eq " C5'" || $ATNAM eq " C5*") { $C5IDX[$NUMRES] = $ATNUM; } if ($ATNAM eq " C3'" || $ATNAM eq " C3*") { $C3IDX[$NUMRES] = $ATNUM; } if ($ATNAM eq " C2'" || $ATNAM eq " C2*") { $C21IDX[$NUMRES] = $ATNUM; } if ($ATNAM eq " O2'" || $ATNAM eq " O2*") { $RNA[$NUMRES] = 1; } # # The chain designation is based on the PDB file. Each residue has a # corresponding residue number. If the first character is a letter # the start and end of a new chain are considered to be where the letter # changes (e.g. A15 followed by B1). The first and last nucleotide of a # chain are not measured # if ($NUMCHN == 0) { $NUMCHN++; $STRTATM[$NUMCHN] = $ATNUM; $CHAIN[$NUMCHN] = $CHTM; $STRTRES[$NUMCHN] = $NUMRES; } elsif ($CHTM[$ATNUM] ne $CHTM[$ATNUM-1]) { $NUMCHN++; $STRTATM[$NUMCHN] = $ATNUM; $ENDATM[$NUMCHN-1] = $ATNUM-1; $STRTRES[$NUMCHN] = $NUMRES; $ENDRES[$NUMCHN-1] = $NUMRES-1; $CHAIN[$NUMCHN] = $CHTM; } } } $ENDATM[$NUMCHN] = $ATNUM; $ENDRES[$NUMCHN] = $NUMRES; # # Following section does the actual parsing of atoms for each individual # nucleotide measurement # for ($i = 1; $i <= $NUMCHN; $i++) { print "chain number is $i\n"; $RESCOUNT -= 2; $TRES = $ENDRES[$i] - $STRTRES[$i] + 1; for ($j = 1; $j <= $TRES; $j++) { $NRES = $j + $STRTRES[$i] - 1; if ($j == 1 || $j == $TRES || $RNA[$NRES] == 0 || $RNA[$NRES-1] == 0 || $RNA[$NRES+1] == 0) { $ALPHA[$NRES] = $BETA[$NRES] = $GAMMA[$NRES] = $DELTA[$NRES] = $EPSILON[$NRES] = $ZETA[$NRES] = $PCPC[$NRES] = $CPCP[$NRES] = $PCPPLATE[$NRES] = $CPCPLATE[$NRES] = $NUTWO[$NRES] = 0; } else { $P1NAM = $PIDX[$NRES]; $C1NAM = $C4IDX[$NRES]; $O4NAM = $O4IDX[$NRES]; $CC1NAM = $C1IDX[$NRES]; $O5NAM = $O5IDX[$NRES]; $C5NAM = $C5IDX[$NRES]; $C3NAM = $C3IDX[$NRES]; $O3NAM = $O3IDX[$NRES]; $C21NAM = $C21IDX[$NRES]; @P1 = (substr($ATOM[$P1NAM],30,8),substr($ATOM[$P1NAM],38,8),substr($ATOM[$P1NAM],46,8)); @C1 = (substr($ATOM[$C1NAM],30,8),substr($ATOM[$C1NAM],38,8),substr($ATOM[$C1NAM],46,8)); @O4 = (substr($ATOM[$O4NAM],30,8),substr($ATOM[$O4NAM],38,8),substr($ATOM[$O4NAM],46,8)); @CC1 = (substr($ATOM[$CC1NAM],30,8),substr($ATOM[$CC1NAM],38,8),substr($ATOM[$CC1NAM],46,8)); @O5 = (substr($ATOM[$O5NAM],30,8),substr($ATOM[$O5NAM],38,8),substr($ATOM[$O5NAM],46,8)); @C5 = (substr($ATOM[$C5NAM],30,8),substr($ATOM[$C5NAM],38,8),substr($ATOM[$C5NAM],46,8)); @C3 = (substr($ATOM[$C3NAM],30,8),substr($ATOM[$C3NAM],38,8),substr($ATOM[$C3NAM],46,8)); @O3 = (substr($ATOM[$O3NAM],30,8),substr($ATOM[$O3NAM],38,8),substr($ATOM[$O3NAM],46,8)); @C21 = (substr($ATOM[$C21NAM],30,8),substr($ATOM[$C21NAM],38,8),substr($ATOM[$C21NAM],46,8)); &MEAS(@P1,@C1); $DISPC[$NRES] = $DIST; &DIHED(@P1, @O5, @C5, @C1, 1); $BETA[$NRES] = $TOR; &DIHED(@O5, @C5, @C1, @C3, 2); $GAMMA[$NRES] = $TOR; &DIHED(@C5, @C1, @C3, @O3, 3); $DELTA[$NRES] = $TOR; &DIHED(@CC1, @C21, @C3, @C1, 3); $NUTWO[$NRES] = $TOR; if ($RESTYP[$NRES] eq " G" || $RESTYP[$NRES] eq " A"){ $N9NAM = $N9IDX[$NRES]; $CC4NAM = $C42IDX[$NRES]; @N9 = (substr($ATOM[$N9NAM],30,8),substr($ATOM[$N9NAM],38,8),substr($ATOM[$N9NAM],46,8)); @CC4 = (substr($ATOM[$CC4NAM],30,8),substr($ATOM[$CC4NAM],38,8),substr($ATOM[$CC4NAM],46,8)); &DIHED(@O4, @CC1, @N9, @CC4, 4); $CHI[$NRES] = $TOR; } elsif ($RESTYP[$NRES] eq " U" || $RESTYP[$NRES] eq " C" || $RESTYP[$NRES] eq " T"){ $N1NAM = $N1IDX[$NRES]; $CC2NAM = $C2IDX[$NRES]; @N1 = (substr($ATOM[$N1NAM],30,8),substr($ATOM[$N1NAM],38,8),substr($ATOM[$N1NAM],46,8)); @CC2 = (substr($ATOM[$CC2NAM],30,8),substr($ATOM[$CC2NAM],38,8),substr($ATOM[$CC2NAM],46,8)); &DIHED(@O4, @CC1, @N1, @CC2, 5); $CHI[$NRES] = $TOR; } else { print "you have a freak ribonucleotide, position $NRES, type $RESTYP[$NRES]\n"; } $P2NAM = $PIDX[$NRES+1]; $C2NAM = $C4IDX[$NRES+1]; $O52NAM = $O5IDX[$NRES+1]; $P0NAM = $PIDX[$NRES - 1]; @P2 = (substr($ATOM[$P2NAM],30,8),substr($ATOM[$P2NAM],38,8),substr($ATOM[$P2NAM],46,8)); @C2 = (substr($ATOM[$C2NAM],30,8),substr($ATOM[$C2NAM],38,8),substr($ATOM[$C2NAM],46,8)); @O52 = (substr($ATOM[$O52NAM],30,8),substr($ATOM[$O52NAM],38,8),substr($ATOM[$O52NAM],46,8)); @P0 = (substr($ATOM[$P0NAM],30,8),substr($ATOM[$P0NAM],38,8),substr($ATOM[$P0NAM],46,8)); &MEAS(@C1,@P2); $DISCP[$NRES] = $DIST; &ANG(@P1,@C1,@P2); $CANG[$NRES] = $THETA; &DIHED(@C3,@O3,@P2,@O52, 6); $ZETA[$NRES] = $TOR; &DIHED(@P1,@C1,@P2,@C2, 7); $PCPC[$NRES] = $TOR; &DIHED(@C1,@C3,@O3,@P2, 8); $EPSILON[$NRES] = $TOR; $C0NAM = $C4IDX[$NRES - 1]; $O30NAM = $O3IDX[$NRES - 1]; @C0 = (substr($ATOM[$C0NAM],30,8),substr($ATOM[$C0NAM],38,8),substr($ATOM[$C0NAM],46,8)); @O30 = (substr($ATOM[$O30NAM],30,8),substr($ATOM[$O30NAM],38,8),substr($ATOM[$O30NAM],46,8)); &ANG(@C0,@P1,@C1); $PANG[$NRES] = $THETA; &DIHED(@C0,@P1,@C1,@P2, 9); $CPCP[$NRES] = $TOR; &DIHED(@O30,@P1,@O5,@C5, 10); $ALPHA[$NRES] = $TOR; } } } # # At this point the measurements for the file have been processed # The following sections check to see if nucleotides within this file # fall within user specified ranges # # Call routine TYPEA # for ($i = 1; $i <= $NUMRES; $i++) { &TYPEA($ALPHA[$i], $BETA[$i], $GAMMA[$i], $DELTA[$i], $EPSILON[$i], $ZETA[$i]); if ($TYPA == 1) { $AHLX[$i] = 1; $GOOD++; } else { $AHLX[$i] = 0; $BAD++; } # # Call routine AREA # &AREA($CPCP[$i], $PCPC[$i]); if ($i == $ENDRES[$#ENDRES]) { $end = $ATNUM; } else { $end = $RESSTRT[$i+1] - 1; } } } # # A subroutine to measure distances between atoms # sub MEAS { local (@HERE) = @_; $X2 = ($HERE[0] - $HERE[3])**2; $Y2 = ($HERE[1] - $HERE[4])**2; $Z2 = ($HERE[2] - $HERE[5])**2; $DIST = ($X2 + $Y2 + $Z2)**(1/2); } # # A subroutine to measure the angle between three atoms # sub ANG { local (@NOW) = @_; local ($SIN, $COS); foreach $foo (@NOW) { if (/!\d/) { $THETA = 400; print "$foo in sub ANG, res $NRES of $FILE is not a number\n"; return; } } $AX = ($NOW[0] - $NOW[3]); $BX = ($NOW[6] - $NOW[3]); $AY = ($NOW[1] - $NOW[4]); $BY = ($NOW[7] - $NOW[4]); $AZ = ($NOW[2] - $NOW[5]); $BZ = ($NOW[8] - $NOW[5]); $COS = (($AX * $BX) + ($AY * $BY) + ($AZ * $BZ))/ ((($AX**2 + $AY**2 + $AZ**2)*($BX**2 + $BY**2 +$BZ**2))**(1/2)); $SIN = (1 - ($COS**2))**(1/2); $THETA = atan2($SIN, $COS)*(180/$pi); } # # A subroutine to measure the torsion defined by four atoms # sub DIHED { local (@DO) = @_; local ($SIN, $COS, $NUCOS); foreach $foo (@DO) { if (/!\d/ || $foo == 0) { $TOR = 400; } } $PX = ($DO[0] - $DO[3]); $PY = ($DO[1] - $DO[4]); $PZ = ($DO[2] - $DO[5]); $QX = ($DO[6] - $DO[3]); $QY = ($DO[7] - $DO[4]); $QZ = ($DO[8] - $DO[5]); $RX = ($DO[9] - $DO[3]); $RY = ($DO[10] - $DO[4]); $RZ = ($DO[11] - $DO[5]); $LNX = ($QY * $RZ) - ($QZ * $RY); $LNY = ($QZ * $RX) - ($QX * $RZ); $LNZ = ($QX * $RY) - ($QY * $RX); $MNX = ($QY * $PZ) - ($QZ * $PY); $MNY = ($QZ * $PX) - ($QX * $PZ); $MNZ = ($QX * $PY) - ($QY * $PX); $COS = (($LNX*$MNX)+($LNY*$MNY)+($LNZ*$MNZ))/((($LNX**2+$LNY**2+$LNZ**2)*($MNX**2+$MNY**2+$MNZ**2))**(1/2)); $SIN = (1 - ($COS**2))**(1/2); $NUCOS = (($MNX*$RX)+($MNY*$RY)+($MNZ*$RZ))/((($MNX**2+$MNY**2+$MNZ**2)*($RX**2+$RY**2+$RZ**2))**(1/2)); $TOR = (atan2($SIN, $COS)*(180/$pi))*($NUCOS/(($NUCOS**2)**(1/2))); if ($TOR < 0){ $TOR = 360 + $TOR } } # # Subroutine to determine if nucleotides fall within user-specified ranges # of eta and theta. Nucleotides which do are added to the files # all_area.txt AND xxx.pdb_area.txt # sub AREA { local (@MSMT) = @_; @UPLIM = ( 190, # change to high limit of eta range 260 # change to high limit of theta range ); @LOWLIM = ( 150, # change to lower limit of eta range 190 # change to lower limit of theta range ); for ($z=0; $z <= $#MSMT; $z++) { if (($MSMT[$z] == 0) && ($MSMT[$z+1] == 0)) { return; } # # The code now finds nucleotides outside of the specified region # To change it to find nucleotides inside the specified region comment # the next executable line and remove the comment (#) from the next line # if (($MSMT[$z] < $LOWLIM[$z}) || ($MSMT[$z] > $UPLIM[$z])) { # if (($MSMT[$z] > $LOWLIM[$z]) && ($MSMT[$z] < $UPLIM[$z])) { return; } } printf AREAOUT "%-7s\t%6.1f\t%6.1f\n", $RESNAM[$i], $MSMT[0], $MSMT[1]; printf ALLAREA"%-5d\t%-7s\t%-4s\t%6.1f\t%6.1f\t%6.1f\t%6.1f\t%6.1f\t%6.1f\t%6.1f\t%6.1f\t%6.1f%\t%6.1f\t%3d\n", $i, $RESNAM[$i], $RESTYP[$i], $CPCP[$i], $PCPC[$i], $CHI[$i], $ALPHA[$i], $BETA[$i], $GAMMA[$i], $DELTA[$i], $EPSILON[$i], $ZETA[$i], $NUTWO[$i], $AHLX[$i]; $AREACOUNT++; } # # Subroutine to determine if nucleotides fall within user-specified ranges # of the standard torsions. Nucleotides which do are noted with a "1" # in the TYPEA column of output files all_sprd.txt and xxx.pdb_sprd.txt # sub TYPEA { $TYPA = 0; local (@MSMT) = @_; @UPLIM = ( 360, # change to high limit for alpha 210, # change to high limit for beta 360, # change to high limit for gamma 100, # change to high limit for delta 225, # change to high limit for epsilon 340 # change to high limit for zeta ); @LOWLIM = ( 155, # change to low limit for alpha 149, # change to low limit for beta 0, # change to low limit for gamma 65, # change to low limit for delta 155, # change to low limit for epsilon 250 # change to low limit for zeta ); $TYPA = 1; }