Reflection files format: Difference between revisions
(Created page with "CCP4 reflection files are binary files that are usually read by routines from CCP4 libraries. However, sometimes one wants to read them with a different programming language o...") |
(No difference)
|
Revision as of 15:04, 2 July 2019
CCP4 reflection files are binary files that are usually read by routines from CCP4 libraries. However, sometimes one wants to read them with a different programming language or without linking to the libraries, and it may be fun to work out alternative ways.
MTZ
A standalone Fortran program readmtz.f90 to read two MTZ files (both with at least one amplitude and one phase column), and to calculate the correlation coefficient of their electron densities, is this:
! gfortran -C -fbacktrace readmtz.f90 -o readmtz MODULE readmtz_mod ! minimal MTZ file reader - Kay Diederichs 11/2018 . According to ! http://www.ccp4.ac.uk/html/mtzformat.html which is not completely correct ! e.g. the reflection data do not start at byte 21=20+1, but at byte 81=4*20+1 TYPE data REAL :: ampli=-1. REAL :: phase END TYPE data CONTAINS SUBROUTINE readmtz(file,nref,refdat,colnames) IMPLICIT NONE CHARACTER, INTENT(IN) :: file*80 INTEGER, INTENT(OUT) :: nref REAL, ALLOCATABLE :: refdat(:,:) CHARACTER*30, ALLOCATABLE :: colnames(:) ! local variables INTEGER :: i,ncol,nbatch CHARACTER :: string*80,colnam(200)*30 nref=-1 OPEN(1,FILE=file,ACCESS='STREAM',ACTION='READ') ! is this BIG_ENDIAN or LITTLE_ENDIAN? we know where first hkl triple is IF (ALLOCATED(refdat)) DEALLOCATE(refdat) IF (ALLOCATED(colnames)) DEALLOCATE(colnames) ALLOCATE(refdat(3,1)) READ(1,POS=81) refdat IF (NINT(refdat(1,1))/=refdat(1,1).OR.NINT(refdat(2,1))/=refdat(2,1).OR.NINT(refdat(3,1))/=refdat(3,1)) THEN CLOSE(1) OPEN(1,FILE=string,ACCESS='STREAM',ACTION='READ',CONVERT='BIG_ENDIAN') PRINT *,'this file seems to be BIG_ENDIAN - was it written on SGI?' END IF DEALLOCATE(refdat) ! where is the header? READ(1,POS=5) i ! PRINT*,'header location:',i READ(1,POS=4*i-3) ! position before header ! read the header records, and print important ones to the screen i=0 DO READ(1)string IF (string(1:4)=='END ') EXIT ! skip history information and batch orientations IF (string(1:8)=='COLUMN H') WRITE(*,'(a)') & '# column name type min max set' IF (string(1:4)=='CELL'.OR.string(1:6)=='SYMINF') WRITE(*,'(a80)') string IF (string(1:6)=='COLUMN') THEN i=i+1 WRITE(*,'(i2,1x,a80)') i,string IF (i>200) STOP 'more than 200 columns? www.ccp4.ac.uk/html/mtzformat.html says 200!' colnam(i)=string(8:37) END IF IF (string(1:4)=='NCOL') THEN READ(string(5:),*) ncol,nref,nbatch PRINT*,'ncol, nref, nbatch=',ncol,nref,nbatch END IF END DO IF (nref==-1) STOP 'could not find NCOL record in header' IF (nbatch/=0) STOP 'nbatch must be 0' IF (i/=ncol) STOP 'column names and NCOL do not match' ALLOCATE(refdat(ncol,nref),colnames(ncol)) colnames=colnam(:ncol) READ(1,POS=81) ! position of reflection data DO i=1,nref READ(1) refdat(:,i) END DO CLOSE(1) END SUBROUTINE readmtz SUBROUTINE copy_given_cols(file,col_ampli,col_phase,refdat,colnames,data3d) CHARACTER, INTENT(IN) :: file*80,col_phase*30,col_ampli*30 REAL, ALLOCATABLE :: refdat(:,:) CHARACTER*30, ALLOCATABLE, INTENT(IN) :: colnames(:) TYPE(data), ALLOCATABLE :: data3d(:,:,:) INTEGER :: i,j,nref,h,k,l,hmin,kmin,lmin,hmax,kmax,lmax,icol_ampli,icol_phase icol_ampli=0 DO i=1,SIZE(colnames) IF (colnames(i)==col_ampli) THEN IF (icol_ampli==0) THEN icol_ampli=i ELSE STOP 'the amplitude column occured more than once!' END IF END IF END DO icol_phase=0 DO i=1,SIZE(colnames) IF (colnames(i)==col_phase) THEN IF (icol_phase==0) THEN icol_phase=i ELSE STOP 'the phase column occured more than once!' END IF END IF END DO WRITE(*,*)'indices of ampli ',TRIM(col_ampli),' and phase ',TRIM(col_phase),' columns:',icol_ampli,icol_phase IF (icol_ampli==0) STOP 'could not find ampli column' IF (icol_phase==0) STOP 'could not find phase column' nref=SIZE(refdat,DIM=2) hmin=MINVAL(NINT(refdat(1,:))) hmax=MAXVAL(NINT(refdat(1,:))) kmin=MINVAL(NINT(refdat(2,:))) kmax=MAXVAL(NINT(refdat(2,:))) lmin=MINVAL(NINT(refdat(3,:))) lmax=MAXVAL(NINT(refdat(3,:))) ALLOCATE(data3d(hmin:hmax,kmin:kmax,lmin:lmax)) j=0 DO i=1,nref h=NINT(refdat(1,i)) k=NINT(refdat(2,i)) l=NINT(refdat(3,i)) IF (isnan(refdat(icol_phase,i)).OR.isnan(refdat(icol_ampli,i))) CYCLE j=j+1 IF (refdat(icol_ampli,i)<0.) THEN PRINT*,i,refdat(icol_ampli,i) STOP 'error: amplitude <0 ?' END IF data3d(h,k,l)%phase=refdat(icol_phase,i) data3d(h,k,l)%ampli=refdat(icol_ampli,i) END DO PRINT*,'number with ampli=NaN or phase =NaN',nref-j PRINT*,'' END SUBROUTINE copy_given_cols END MODULE readmtz_mod PROGRAM main ! read two files, and calculate the correlation coefficient based on given columns USE readmtz_mod IMPLICIT NONE REAL, PARAMETER :: deg2rad=ATAN(1.)/45. REAL, ALLOCATABLE :: refdat(:,:) CHARACTER :: file*80,arg*80,col_phase*30,col_ampli*30 CHARACTER*30, ALLOCATABLE :: colnames(:) TYPE(data), ALLOCATABLE :: d1(:,:,:),d2(:,:,:) INTEGER :: i,j,nref,h,k,l,hmin,kmin,lmin,hmax,kmax,lmax REAL :: cc_zaehler,cc_nenner,delphi IF (COMMAND_ARGUMENT_COUNT()==0) STOP 'usage: readmtz file1 ampli_nam phase_nam file2 ampli_nam phase_nam' ! read first MTZ file CALL GET_COMMAND_ARGUMENT(1,file) WRITE(*,*) TRIM(file) CALL readmtz(file,nref,refdat,colnames) CALL GET_COMMAND_ARGUMENT(2,col_ampli) ! expects column name of amplitude CALL GET_COMMAND_ARGUMENT(3,col_phase) ! expects column name of phase CALL copy_given_cols(file,col_ampli,col_phase,refdat,colnames,d1) ! read second MTZ file CALL GET_COMMAND_ARGUMENT(4,file) WRITE(*,*) TRIM(file) CALL readmtz(file,nref,refdat,colnames) CALL GET_COMMAND_ARGUMENT(5,col_ampli) ! expects column name of amplitude CALL GET_COMMAND_ARGUMENT(6,col_phase) ! expects column name of phase CALL copy_given_cols(file,col_ampli,col_phase,refdat,colnames,d2) ! common array bounds hmin=MAX(LBOUND(d1,DIM=1),LBOUND(d2,DIM=1)) hmax=MIN(UBOUND(d1,DIM=1),UBOUND(d2,DIM=1)) kmin=MAX(LBOUND(d1,DIM=2),LBOUND(d2,DIM=2)) kmax=MIN(UBOUND(d1,DIM=2),UBOUND(d2,DIM=2)) lmin=MAX(LBOUND(d1,DIM=3),LBOUND(d2,DIM=3)) lmax=MIN(UBOUND(d1,DIM=3),UBOUND(d2,DIM=3)) ! do some calculation: cc_zaehler=0. cc_nenner=0. DO l=lmin,lmax DO k=kmin,kmax DO h=hmin,hmax IF (d1(h,k,l)%ampli<0.OR.d2(h,k,l)%ampli<0.) CYCLE delphi=ABS( d1(h,k,l)%phase - d2(h,k,l)%phase ) IF (delphi>180.) delphi=360.-delphi IF (ABS(delphi)>180.) STOP 'assert error delphi' cc_zaehler = cc_zaehler + d1(h,k,l)%ampli**2 * COS(delphi*deg2rad) cc_nenner = cc_nenner + d1(h,k,l)%ampli**2 END DO END DO END DO IF (cc_nenner>0.) cc_zaehler = cc_zaehler/cc_nenner PRINT*,'cc=',cc_zaehler END PROGRAM main
LCF
The CCP4 LCF (labelled column format) files were phased-out in the early 90's but some interesting data still lingers on half-inch tapes so, if they can be read, the LCF files can be converted to ASCII or other formats for safe-keeping, or further use. Jonathan Cooper has worked out how to read them, mainly using Python, and documents this.