Reflection files format: Difference between revisions
Created page with "CCP4 reflection files are binary files that are usually read by routines from CCP4 libraries. However, sometimes one wants to read them with a different programming language o..." |
(No difference)
|
Revision as of 15:04, 2 July 2019
CCP4 reflection files are binary files that are usually read by routines from CCP4 libraries. However, sometimes one wants to read them with a different programming language or without linking to the libraries, and it may be fun to work out alternative ways.
MTZ
A standalone Fortran program readmtz.f90 to read two MTZ files (both with at least one amplitude and one phase column), and to calculate the correlation coefficient of their electron densities, is this:
! gfortran -C -fbacktrace readmtz.f90 -o readmtz
MODULE readmtz_mod
! minimal MTZ file reader - Kay Diederichs 11/2018 . According to
! http://www.ccp4.ac.uk/html/mtzformat.html which is not completely correct
! e.g. the reflection data do not start at byte 21=20+1, but at byte 81=4*20+1
TYPE data
REAL :: ampli=-1.
REAL :: phase
END TYPE data
CONTAINS
SUBROUTINE readmtz(file,nref,refdat,colnames)
IMPLICIT NONE
CHARACTER, INTENT(IN) :: file*80
INTEGER, INTENT(OUT) :: nref
REAL, ALLOCATABLE :: refdat(:,:)
CHARACTER*30, ALLOCATABLE :: colnames(:)
! local variables
INTEGER :: i,ncol,nbatch
CHARACTER :: string*80,colnam(200)*30
nref=-1
OPEN(1,FILE=file,ACCESS='STREAM',ACTION='READ')
! is this BIG_ENDIAN or LITTLE_ENDIAN? we know where first hkl triple is
IF (ALLOCATED(refdat)) DEALLOCATE(refdat)
IF (ALLOCATED(colnames)) DEALLOCATE(colnames)
ALLOCATE(refdat(3,1))
READ(1,POS=81) refdat
IF (NINT(refdat(1,1))/=refdat(1,1).OR.NINT(refdat(2,1))/=refdat(2,1).OR.NINT(refdat(3,1))/=refdat(3,1)) THEN
CLOSE(1)
OPEN(1,FILE=string,ACCESS='STREAM',ACTION='READ',CONVERT='BIG_ENDIAN')
PRINT *,'this file seems to be BIG_ENDIAN - was it written on SGI?'
END IF
DEALLOCATE(refdat)
! where is the header?
READ(1,POS=5) i ! PRINT*,'header location:',i
READ(1,POS=4*i-3) ! position before header
! read the header records, and print important ones to the screen
i=0
DO
READ(1)string
IF (string(1:4)=='END ') EXIT ! skip history information and batch orientations
IF (string(1:8)=='COLUMN H') WRITE(*,'(a)') &
'# column name type min max set'
IF (string(1:4)=='CELL'.OR.string(1:6)=='SYMINF') WRITE(*,'(a80)') string
IF (string(1:6)=='COLUMN') THEN
i=i+1
WRITE(*,'(i2,1x,a80)') i,string
IF (i>200) STOP 'more than 200 columns? www.ccp4.ac.uk/html/mtzformat.html says 200!'
colnam(i)=string(8:37)
END IF
IF (string(1:4)=='NCOL') THEN
READ(string(5:),*) ncol,nref,nbatch
PRINT*,'ncol, nref, nbatch=',ncol,nref,nbatch
END IF
END DO
IF (nref==-1) STOP 'could not find NCOL record in header'
IF (nbatch/=0) STOP 'nbatch must be 0'
IF (i/=ncol) STOP 'column names and NCOL do not match'
ALLOCATE(refdat(ncol,nref),colnames(ncol))
colnames=colnam(:ncol)
READ(1,POS=81) ! position of reflection data
DO i=1,nref
READ(1) refdat(:,i)
END DO
CLOSE(1)
END SUBROUTINE readmtz
SUBROUTINE copy_given_cols(file,col_ampli,col_phase,refdat,colnames,data3d)
CHARACTER, INTENT(IN) :: file*80,col_phase*30,col_ampli*30
REAL, ALLOCATABLE :: refdat(:,:)
CHARACTER*30, ALLOCATABLE, INTENT(IN) :: colnames(:)
TYPE(data), ALLOCATABLE :: data3d(:,:,:)
INTEGER :: i,j,nref,h,k,l,hmin,kmin,lmin,hmax,kmax,lmax,icol_ampli,icol_phase
icol_ampli=0
DO i=1,SIZE(colnames)
IF (colnames(i)==col_ampli) THEN
IF (icol_ampli==0) THEN
icol_ampli=i
ELSE
STOP 'the amplitude column occured more than once!'
END IF
END IF
END DO
icol_phase=0
DO i=1,SIZE(colnames)
IF (colnames(i)==col_phase) THEN
IF (icol_phase==0) THEN
icol_phase=i
ELSE
STOP 'the phase column occured more than once!'
END IF
END IF
END DO
WRITE(*,*)'indices of ampli ',TRIM(col_ampli),' and phase ',TRIM(col_phase),' columns:',icol_ampli,icol_phase
IF (icol_ampli==0) STOP 'could not find ampli column'
IF (icol_phase==0) STOP 'could not find phase column'
nref=SIZE(refdat,DIM=2)
hmin=MINVAL(NINT(refdat(1,:)))
hmax=MAXVAL(NINT(refdat(1,:)))
kmin=MINVAL(NINT(refdat(2,:)))
kmax=MAXVAL(NINT(refdat(2,:)))
lmin=MINVAL(NINT(refdat(3,:)))
lmax=MAXVAL(NINT(refdat(3,:)))
ALLOCATE(data3d(hmin:hmax,kmin:kmax,lmin:lmax))
j=0
DO i=1,nref
h=NINT(refdat(1,i))
k=NINT(refdat(2,i))
l=NINT(refdat(3,i))
IF (isnan(refdat(icol_phase,i)).OR.isnan(refdat(icol_ampli,i))) CYCLE
j=j+1
IF (refdat(icol_ampli,i)<0.) THEN
PRINT*,i,refdat(icol_ampli,i)
STOP 'error: amplitude <0 ?'
END IF
data3d(h,k,l)%phase=refdat(icol_phase,i)
data3d(h,k,l)%ampli=refdat(icol_ampli,i)
END DO
PRINT*,'number with ampli=NaN or phase =NaN',nref-j
PRINT*,''
END SUBROUTINE copy_given_cols
END MODULE readmtz_mod
PROGRAM main
! read two files, and calculate the correlation coefficient based on given columns
USE readmtz_mod
IMPLICIT NONE
REAL, PARAMETER :: deg2rad=ATAN(1.)/45.
REAL, ALLOCATABLE :: refdat(:,:)
CHARACTER :: file*80,arg*80,col_phase*30,col_ampli*30
CHARACTER*30, ALLOCATABLE :: colnames(:)
TYPE(data), ALLOCATABLE :: d1(:,:,:),d2(:,:,:)
INTEGER :: i,j,nref,h,k,l,hmin,kmin,lmin,hmax,kmax,lmax
REAL :: cc_zaehler,cc_nenner,delphi
IF (COMMAND_ARGUMENT_COUNT()==0) STOP 'usage: readmtz file1 ampli_nam phase_nam file2 ampli_nam phase_nam'
! read first MTZ file
CALL GET_COMMAND_ARGUMENT(1,file)
WRITE(*,*) TRIM(file)
CALL readmtz(file,nref,refdat,colnames)
CALL GET_COMMAND_ARGUMENT(2,col_ampli) ! expects column name of amplitude
CALL GET_COMMAND_ARGUMENT(3,col_phase) ! expects column name of phase
CALL copy_given_cols(file,col_ampli,col_phase,refdat,colnames,d1)
! read second MTZ file
CALL GET_COMMAND_ARGUMENT(4,file)
WRITE(*,*) TRIM(file)
CALL readmtz(file,nref,refdat,colnames)
CALL GET_COMMAND_ARGUMENT(5,col_ampli) ! expects column name of amplitude
CALL GET_COMMAND_ARGUMENT(6,col_phase) ! expects column name of phase
CALL copy_given_cols(file,col_ampli,col_phase,refdat,colnames,d2)
! common array bounds
hmin=MAX(LBOUND(d1,DIM=1),LBOUND(d2,DIM=1))
hmax=MIN(UBOUND(d1,DIM=1),UBOUND(d2,DIM=1))
kmin=MAX(LBOUND(d1,DIM=2),LBOUND(d2,DIM=2))
kmax=MIN(UBOUND(d1,DIM=2),UBOUND(d2,DIM=2))
lmin=MAX(LBOUND(d1,DIM=3),LBOUND(d2,DIM=3))
lmax=MIN(UBOUND(d1,DIM=3),UBOUND(d2,DIM=3))
! do some calculation:
cc_zaehler=0.
cc_nenner=0.
DO l=lmin,lmax
DO k=kmin,kmax
DO h=hmin,hmax
IF (d1(h,k,l)%ampli<0.OR.d2(h,k,l)%ampli<0.) CYCLE
delphi=ABS( d1(h,k,l)%phase - d2(h,k,l)%phase )
IF (delphi>180.) delphi=360.-delphi
IF (ABS(delphi)>180.) STOP 'assert error delphi'
cc_zaehler = cc_zaehler + d1(h,k,l)%ampli**2 * COS(delphi*deg2rad)
cc_nenner = cc_nenner + d1(h,k,l)%ampli**2
END DO
END DO
END DO
IF (cc_nenner>0.) cc_zaehler = cc_zaehler/cc_nenner
PRINT*,'cc=',cc_zaehler
END PROGRAM main
LCF
The CCP4 LCF (labelled column format) files were phased-out in the early 90's but some interesting data still lingers on half-inch tapes so, if they can be read, the LCF files can be converted to ASCII or other formats for safe-keeping, or further use. Jonathan Cooper has worked out how to read them, mainly using Python, and documents this.