Reflection files format
CCP4 reflection files are binary files that are usually read by routines from CCP4 libraries. However, sometimes one wants to read them with a different programming language or without linking to the libraries, and it may be fun to work out alternative ways.
MTZ
A standalone Fortran program readmtz.f90 to read two MTZ files (both with at least one amplitude and one phase column), and to calculate the correlation coefficient of their electron densities, is this:
! gfortran -C -fbacktrace readmtz.f90 -o readmtz
MODULE readmtz_mod
! minimal MTZ file reader - Kay Diederichs 11/2018 . According to
! http://www.ccp4.ac.uk/html/mtzformat.html which is not completely correct
! e.g. the reflection data do not start at byte 21=20+1, but at byte 81=4*20+1
TYPE data
REAL :: ampli=-1.
REAL :: phase
END TYPE data
CONTAINS
SUBROUTINE readmtz(file,nref,refdat,colnames)
IMPLICIT NONE
CHARACTER, INTENT(IN) :: file*80
INTEGER, INTENT(OUT) :: nref
REAL, ALLOCATABLE :: refdat(:,:)
CHARACTER*30, ALLOCATABLE :: colnames(:)
! local variables
INTEGER :: i,ncol,nbatch
CHARACTER :: string*80,colnam(200)*30
nref=-1
OPEN(1,FILE=file,ACCESS='STREAM',ACTION='READ')
! is this BIG_ENDIAN or LITTLE_ENDIAN? we know where first hkl triple is
IF (ALLOCATED(refdat)) DEALLOCATE(refdat)
IF (ALLOCATED(colnames)) DEALLOCATE(colnames)
ALLOCATE(refdat(3,1))
READ(1,POS=81) refdat
IF (NINT(refdat(1,1))/=refdat(1,1).OR.NINT(refdat(2,1))/=refdat(2,1).OR.NINT(refdat(3,1))/=refdat(3,1)) THEN
CLOSE(1)
OPEN(1,FILE=string,ACCESS='STREAM',ACTION='READ',CONVERT='BIG_ENDIAN')
PRINT *,'this file seems to be BIG_ENDIAN - was it written on SGI?'
END IF
DEALLOCATE(refdat)
! where is the header?
READ(1,POS=5) i ! PRINT*,'header location:',i
READ(1,POS=4*i-3) ! position before header
! read the header records, and print important ones to the screen
i=0
DO
READ(1)string
IF (string(1:4)=='END ') EXIT ! skip history information and batch orientations
IF (string(1:8)=='COLUMN H') WRITE(*,'(a)') &
'# column name type min max set'
IF (string(1:4)=='CELL'.OR.string(1:6)=='SYMINF') WRITE(*,'(a80)') string
IF (string(1:6)=='COLUMN') THEN
i=i+1
WRITE(*,'(i2,1x,a80)') i,string
IF (i>200) STOP 'more than 200 columns? www.ccp4.ac.uk/html/mtzformat.html says 200!'
colnam(i)=string(8:37)
END IF
IF (string(1:4)=='NCOL') THEN
READ(string(5:),*) ncol,nref,nbatch
PRINT*,'ncol, nref, nbatch=',ncol,nref,nbatch
END IF
END DO
IF (nref==-1) STOP 'could not find NCOL record in header'
IF (nbatch/=0) STOP 'nbatch must be 0'
IF (i/=ncol) STOP 'column names and NCOL do not match'
ALLOCATE(refdat(ncol,nref),colnames(ncol))
colnames=colnam(:ncol)
READ(1,POS=81) ! position of reflection data
DO i=1,nref
READ(1) refdat(:,i)
END DO
CLOSE(1)
END SUBROUTINE readmtz
SUBROUTINE copy_given_cols(file,col_ampli,col_phase,refdat,colnames,data3d)
CHARACTER, INTENT(IN) :: file*80,col_phase*30,col_ampli*30
REAL, ALLOCATABLE :: refdat(:,:)
CHARACTER*30, ALLOCATABLE, INTENT(IN) :: colnames(:)
TYPE(data), ALLOCATABLE :: data3d(:,:,:)
INTEGER :: i,j,nref,h,k,l,hmin,kmin,lmin,hmax,kmax,lmax,icol_ampli,icol_phase
icol_ampli=0
DO i=1,SIZE(colnames)
IF (colnames(i)==col_ampli) THEN
IF (icol_ampli==0) THEN
icol_ampli=i
ELSE
STOP 'the amplitude column occured more than once!'
END IF
END IF
END DO
icol_phase=0
DO i=1,SIZE(colnames)
IF (colnames(i)==col_phase) THEN
IF (icol_phase==0) THEN
icol_phase=i
ELSE
STOP 'the phase column occured more than once!'
END IF
END IF
END DO
WRITE(*,*)'indices of ampli ',TRIM(col_ampli),' and phase ',TRIM(col_phase),' columns:',icol_ampli,icol_phase
IF (icol_ampli==0) STOP 'could not find ampli column'
IF (icol_phase==0) STOP 'could not find phase column'
nref=SIZE(refdat,DIM=2)
hmin=MINVAL(NINT(refdat(1,:)))
hmax=MAXVAL(NINT(refdat(1,:)))
kmin=MINVAL(NINT(refdat(2,:)))
kmax=MAXVAL(NINT(refdat(2,:)))
lmin=MINVAL(NINT(refdat(3,:)))
lmax=MAXVAL(NINT(refdat(3,:)))
ALLOCATE(data3d(hmin:hmax,kmin:kmax,lmin:lmax))
j=0
DO i=1,nref
h=NINT(refdat(1,i))
k=NINT(refdat(2,i))
l=NINT(refdat(3,i))
IF (isnan(refdat(icol_phase,i)).OR.isnan(refdat(icol_ampli,i))) CYCLE
j=j+1
IF (refdat(icol_ampli,i)<0.) THEN
PRINT*,i,refdat(icol_ampli,i)
STOP 'error: amplitude <0 ?'
END IF
data3d(h,k,l)%phase=refdat(icol_phase,i)
data3d(h,k,l)%ampli=refdat(icol_ampli,i)
END DO
PRINT*,'number with ampli=NaN or phase =NaN',nref-j
PRINT*,''
END SUBROUTINE copy_given_cols
END MODULE readmtz_mod
PROGRAM main
! read two files, and calculate the correlation coefficient based on given columns
USE readmtz_mod
IMPLICIT NONE
REAL, PARAMETER :: deg2rad=ATAN(1.)/45.
REAL, ALLOCATABLE :: refdat(:,:)
CHARACTER :: file*80,arg*80,col_phase*30,col_ampli*30
CHARACTER*30, ALLOCATABLE :: colnames(:)
TYPE(data), ALLOCATABLE :: d1(:,:,:),d2(:,:,:)
INTEGER :: i,j,nref,h,k,l,hmin,kmin,lmin,hmax,kmax,lmax
REAL :: cc_zaehler,cc_nenner,delphi
IF (COMMAND_ARGUMENT_COUNT()==0) STOP 'usage: readmtz file1 ampli_nam phase_nam file2 ampli_nam phase_nam'
! read first MTZ file
CALL GET_COMMAND_ARGUMENT(1,file)
WRITE(*,*) TRIM(file)
CALL readmtz(file,nref,refdat,colnames)
CALL GET_COMMAND_ARGUMENT(2,col_ampli) ! expects column name of amplitude
CALL GET_COMMAND_ARGUMENT(3,col_phase) ! expects column name of phase
CALL copy_given_cols(file,col_ampli,col_phase,refdat,colnames,d1)
! read second MTZ file
CALL GET_COMMAND_ARGUMENT(4,file)
WRITE(*,*) TRIM(file)
CALL readmtz(file,nref,refdat,colnames)
CALL GET_COMMAND_ARGUMENT(5,col_ampli) ! expects column name of amplitude
CALL GET_COMMAND_ARGUMENT(6,col_phase) ! expects column name of phase
CALL copy_given_cols(file,col_ampli,col_phase,refdat,colnames,d2)
! common array bounds
hmin=MAX(LBOUND(d1,DIM=1),LBOUND(d2,DIM=1))
hmax=MIN(UBOUND(d1,DIM=1),UBOUND(d2,DIM=1))
kmin=MAX(LBOUND(d1,DIM=2),LBOUND(d2,DIM=2))
kmax=MIN(UBOUND(d1,DIM=2),UBOUND(d2,DIM=2))
lmin=MAX(LBOUND(d1,DIM=3),LBOUND(d2,DIM=3))
lmax=MIN(UBOUND(d1,DIM=3),UBOUND(d2,DIM=3))
! do some calculation:
cc_zaehler=0.
cc_nenner=0.
DO l=lmin,lmax
DO k=kmin,kmax
DO h=hmin,hmax
IF (d1(h,k,l)%ampli<0.OR.d2(h,k,l)%ampli<0.) CYCLE
delphi=ABS( d1(h,k,l)%phase - d2(h,k,l)%phase )
IF (delphi>180.) delphi=360.-delphi
IF (ABS(delphi)>180.) STOP 'assert error delphi'
cc_zaehler = cc_zaehler + d1(h,k,l)%ampli**2 * COS(delphi*deg2rad)
cc_nenner = cc_nenner + d1(h,k,l)%ampli**2
END DO
END DO
END DO
IF (cc_nenner>0.) cc_zaehler = cc_zaehler/cc_nenner
PRINT*,'cc=',cc_zaehler
END PROGRAM main
LCF
The CCP4 LCF (labelled column format) files were phased-out in the early 90's but some interesting data still lingers on half-inch tapes so, if they can be read, the LCF files can be converted to ASCII or other formats for safe-keeping, or further use. Jonathan Cooper has worked out how to read them, mainly using Python, and documents this. The directory ftp://ftp.ccp4.ac.uk/lcfstuff/ has some Fortran routines.
A Fortran program (lcf_dump) to accomplish this is:
! reads cell, column labels, comment, and reflection data from LCF file - Kay Diederichs 7/2019 . ! Nota bene - symmetry i.e. space group seemingly not stored in LCF file ! reading VAX format is easy with the ifort compiler since it understands VAX REAL format: ! ifort lcf_dump.f90 -o lcf_dump IMPLICIT NONE ! LCF items INTEGER(2) :: header(8),ncol INTEGER(2), ALLOCATABLE :: refdat(:) REAL :: cell(6) CHARACTER :: separator*12 ! length may depend on machine that wrote LCF file ! program variables INTEGER :: i CHARACTER :: string*120 i=COMMAND_ARGUMENT_COUNT() IF (i/=1) STOP 'usage: lcf_dump <name.LCF>' ! read command line CALL GET_COMMAND_ARGUMENT(1,string) ! expects LCF filename on command line ! OPEN(CONVERT=...) options are documented at ! https://software.intel.com/en-us/fortran-compiler-developer-guide-and-reference-open-convert-specifier OPEN(1,FILE=string,ACCESS='STREAM',ACTION='READ',CONVERT='VAXG') WRITE(*,*) 'LCF file: ',TRIM(string) ! read header and cell READ(1) header,separator ! reads 8*2+12 bytes. ends after byte 28 WRITE(*,'(a,8(i0,1x))') ' header: ',header ! the following code determines ncol according to lcflib.f line 1534ff: IF (header(1) /= -32768) STOP 'error - first header item is not -32768' IF (header(5) == -12) THEN ncol=6 ELSE IF (header(6)==-14) THEN ncol=7 ELSE ncol=-header(7)/2 IF (ncol<0 .OR. ncol>100) STOP 'error - could not determine ncol' END IF WRITE(*,'(a,i0)') ' ncol: ',ncol ALLOCATE(refdat(ncol)) DO i=1,6 READ(1) cell(i),separator ! reads 6*(4+12) bytes. ends after byte 124 END DO WRITE(*,'(a,6f10.4)') ' cell:',cell ! read column labels and comment string='' DO i=1,(header(8)+3)/4 READ(1)string(1+(i-1)*4:i*4),separator END DO WRITE(*,'(a)') TRIM(string) ! read reflections READ(1) refdat(1:2) ! needed for positioning - what is their meaning? DO READ(1) refdat IF (refdat(1)==32767) EXIT WRITE(*,*) refdat END DO END