Reflection files format
CCP4 reflection files are binary files that are usually read by routines from CCP4 libraries. However, sometimes one wants to read them with a different programming language or without linking to the libraries, and it may be fun to work out alternative ways.
MTZ[edit | edit source]
A standalone Fortran program readmtz.f90 to read two MTZ files (both with at least one amplitude and one phase column), and to calculate the correlation coefficient of their electron densities, is this:
! gfortran -C -fbacktrace readmtz.f90 -o readmtz MODULE readmtz_mod ! minimal MTZ file reader - Kay Diederichs 11/2018 . According to ! http://www.ccp4.ac.uk/html/mtzformat.html which is not completely correct ! e.g. the reflection data do not start at byte 21=20+1, but at byte 81=4*20+1 TYPE data REAL :: ampli=-1. REAL :: phase END TYPE data CONTAINS SUBROUTINE readmtz(file,nref,refdat,colnames) IMPLICIT NONE CHARACTER, INTENT(IN) :: file*80 INTEGER, INTENT(OUT) :: nref REAL, ALLOCATABLE :: refdat(:,:) CHARACTER*30, ALLOCATABLE :: colnames(:) ! local variables INTEGER :: i,ncol,nbatch CHARACTER :: string*80,colnam(200)*30 nref=-1 OPEN(1,FILE=file,ACCESS='STREAM',ACTION='READ') ! is this BIG_ENDIAN or LITTLE_ENDIAN? we know where first hkl triple is IF (ALLOCATED(refdat)) DEALLOCATE(refdat) IF (ALLOCATED(colnames)) DEALLOCATE(colnames) ALLOCATE(refdat(3,1)) READ(1,POS=81) refdat IF (NINT(refdat(1,1))/=refdat(1,1).OR.NINT(refdat(2,1))/=refdat(2,1).OR.NINT(refdat(3,1))/=refdat(3,1)) THEN CLOSE(1) OPEN(1,FILE=string,ACCESS='STREAM',ACTION='READ',CONVERT='BIG_ENDIAN') PRINT *,'this file seems to be BIG_ENDIAN - was it written on SGI?' END IF DEALLOCATE(refdat) ! where is the header? READ(1,POS=5) i ! PRINT*,'header location:',i READ(1,POS=4*i-3) ! position before header ! read the header records, and print important ones to the screen i=0 DO READ(1)string IF (string(1:4)=='END ') EXIT ! skip history information and batch orientations IF (string(1:8)=='COLUMN H') WRITE(*,'(a)') & '# column name type min max set' IF (string(1:4)=='CELL'.OR.string(1:6)=='SYMINF') WRITE(*,'(a80)') string IF (string(1:6)=='COLUMN') THEN i=i+1 WRITE(*,'(i2,1x,a80)') i,string IF (i>200) STOP 'more than 200 columns? www.ccp4.ac.uk/html/mtzformat.html says 200!' colnam(i)=string(8:37) END IF IF (string(1:4)=='NCOL') THEN READ(string(5:),*) ncol,nref,nbatch PRINT*,'ncol, nref, nbatch=',ncol,nref,nbatch END IF END DO IF (nref==-1) STOP 'could not find NCOL record in header' IF (nbatch/=0) STOP 'nbatch must be 0' IF (i/=ncol) STOP 'column names and NCOL do not match' ALLOCATE(refdat(ncol,nref),colnames(ncol)) colnames=colnam(:ncol) READ(1,POS=81) ! position of reflection data DO i=1,nref READ(1) refdat(:,i) END DO CLOSE(1) END SUBROUTINE readmtz SUBROUTINE copy_given_cols(file,col_ampli,col_phase,refdat,colnames,data3d) CHARACTER, INTENT(IN) :: file*80,col_phase*30,col_ampli*30 REAL, ALLOCATABLE :: refdat(:,:) CHARACTER*30, ALLOCATABLE, INTENT(IN) :: colnames(:) TYPE(data), ALLOCATABLE :: data3d(:,:,:) INTEGER :: i,j,nref,h,k,l,hmin,kmin,lmin,hmax,kmax,lmax,icol_ampli,icol_phase icol_ampli=0 DO i=1,SIZE(colnames) IF (colnames(i)==col_ampli) THEN IF (icol_ampli==0) THEN icol_ampli=i ELSE STOP 'the amplitude column occured more than once!' END IF END IF END DO icol_phase=0 DO i=1,SIZE(colnames) IF (colnames(i)==col_phase) THEN IF (icol_phase==0) THEN icol_phase=i ELSE STOP 'the phase column occured more than once!' END IF END IF END DO WRITE(*,*)'indices of ampli ',TRIM(col_ampli),' and phase ',TRIM(col_phase),' columns:',icol_ampli,icol_phase IF (icol_ampli==0) STOP 'could not find ampli column' IF (icol_phase==0) STOP 'could not find phase column' nref=SIZE(refdat,DIM=2) hmin=MINVAL(NINT(refdat(1,:))) hmax=MAXVAL(NINT(refdat(1,:))) kmin=MINVAL(NINT(refdat(2,:))) kmax=MAXVAL(NINT(refdat(2,:))) lmin=MINVAL(NINT(refdat(3,:))) lmax=MAXVAL(NINT(refdat(3,:))) ALLOCATE(data3d(hmin:hmax,kmin:kmax,lmin:lmax)) j=0 DO i=1,nref h=NINT(refdat(1,i)) k=NINT(refdat(2,i)) l=NINT(refdat(3,i)) IF (isnan(refdat(icol_phase,i)).OR.isnan(refdat(icol_ampli,i))) CYCLE j=j+1 IF (refdat(icol_ampli,i)<0.) THEN PRINT*,i,refdat(icol_ampli,i) STOP 'error: amplitude <0 ?' END IF data3d(h,k,l)%phase=refdat(icol_phase,i) data3d(h,k,l)%ampli=refdat(icol_ampli,i) END DO PRINT*,'number with ampli=NaN or phase =NaN',nref-j PRINT*,'' END SUBROUTINE copy_given_cols END MODULE readmtz_mod PROGRAM main ! read two files, and calculate the correlation coefficient based on given columns USE readmtz_mod IMPLICIT NONE REAL, PARAMETER :: deg2rad=ATAN(1.)/45. REAL, ALLOCATABLE :: refdat(:,:) CHARACTER :: file*80,arg*80,col_phase*30,col_ampli*30 CHARACTER*30, ALLOCATABLE :: colnames(:) TYPE(data), ALLOCATABLE :: d1(:,:,:),d2(:,:,:) INTEGER :: i,j,nref,h,k,l,hmin,kmin,lmin,hmax,kmax,lmax REAL :: cc_zaehler,cc_nenner,delphi IF (COMMAND_ARGUMENT_COUNT()==0) STOP 'usage: readmtz file1 ampli_nam phase_nam file2 ampli_nam phase_nam' ! read first MTZ file CALL GET_COMMAND_ARGUMENT(1,file) WRITE(*,*) TRIM(file) CALL readmtz(file,nref,refdat,colnames) CALL GET_COMMAND_ARGUMENT(2,col_ampli) ! expects column name of amplitude CALL GET_COMMAND_ARGUMENT(3,col_phase) ! expects column name of phase CALL copy_given_cols(file,col_ampli,col_phase,refdat,colnames,d1) ! read second MTZ file CALL GET_COMMAND_ARGUMENT(4,file) WRITE(*,*) TRIM(file) CALL readmtz(file,nref,refdat,colnames) CALL GET_COMMAND_ARGUMENT(5,col_ampli) ! expects column name of amplitude CALL GET_COMMAND_ARGUMENT(6,col_phase) ! expects column name of phase CALL copy_given_cols(file,col_ampli,col_phase,refdat,colnames,d2) ! common array bounds hmin=MAX(LBOUND(d1,DIM=1),LBOUND(d2,DIM=1)) hmax=MIN(UBOUND(d1,DIM=1),UBOUND(d2,DIM=1)) kmin=MAX(LBOUND(d1,DIM=2),LBOUND(d2,DIM=2)) kmax=MIN(UBOUND(d1,DIM=2),UBOUND(d2,DIM=2)) lmin=MAX(LBOUND(d1,DIM=3),LBOUND(d2,DIM=3)) lmax=MIN(UBOUND(d1,DIM=3),UBOUND(d2,DIM=3)) ! do some calculation: cc_zaehler=0. cc_nenner=0. DO l=lmin,lmax DO k=kmin,kmax DO h=hmin,hmax IF (d1(h,k,l)%ampli<0.OR.d2(h,k,l)%ampli<0.) CYCLE delphi=ABS( d1(h,k,l)%phase - d2(h,k,l)%phase ) IF (delphi>180.) delphi=360.-delphi IF (ABS(delphi)>180.) STOP 'assert error delphi' cc_zaehler = cc_zaehler + d1(h,k,l)%ampli**2 * COS(delphi*deg2rad) cc_nenner = cc_nenner + d1(h,k,l)%ampli**2 END DO END DO END DO IF (cc_nenner>0.) cc_zaehler = cc_zaehler/cc_nenner PRINT*,'cc=',cc_zaehler END PROGRAM main
LCF[edit | edit source]
The CCP4 LCF (labelled column format) files were phased-out in the early 90's but some interesting data still lingers on half-inch tapes so, if they can be read, the LCF files can be converted to ASCII or other formats for safe-keeping, or further use. Jonathan Cooper has worked out how to read them, mainly using Python, and documents this. The directory ftp://ftp.ccp4.ac.uk/lcfstuff/ has some Fortran routines.
A Fortran program (lcf_dump) to accomplish this is:
! reads cell, column labels, comment, and reflection data from LCF file - Kay Diederichs 7/2019 . ! Nota bene - symmetry i.e. space group seemingly not stored in LCF file ! reading VAX format is easy with the ifort compiler since it understands VAX REAL format: ! ifort lcf_dump.f90 -o lcf_dump IMPLICIT NONE ! LCF items INTEGER(2) :: header(8),ncol INTEGER(2), ALLOCATABLE :: refdat(:) REAL :: cell(6) CHARACTER :: separator*12 ! length may depend on machine that wrote LCF file ! program variables INTEGER :: i CHARACTER :: string*120 i=COMMAND_ARGUMENT_COUNT() IF (i/=1) STOP 'usage: lcf_dump <name.LCF>' ! read command line CALL GET_COMMAND_ARGUMENT(1,string) ! expects LCF filename on command line ! OPEN(CONVERT=...) options are documented at ! https://software.intel.com/en-us/fortran-compiler-developer-guide-and-reference-open-convert-specifier OPEN(1,FILE=string,ACCESS='STREAM',ACTION='READ',CONVERT='VAXG') WRITE(*,*) 'LCF file: ',TRIM(string) ! read header and cell READ(1) header,separator ! reads 8*2+12 bytes. ends after byte 28 WRITE(*,'(a,8(i0,1x))') ' header: ',header ! the following code determines ncol according to lcflib.f line 1534ff: IF (header(1) /= -32768) STOP 'error - first header item is not -32768' IF (header(5) == -12) THEN ncol=6 ELSE IF (header(6)==-14) THEN ncol=7 ELSE ncol=-header(7)/2 IF (ncol<0 .OR. ncol>100) STOP 'error - could not determine ncol' END IF WRITE(*,'(a,i0)') ' ncol: ',ncol ALLOCATE(refdat(ncol)) DO i=1,6 READ(1) cell(i),separator ! reads 6*(4+12) bytes. ends after byte 124 END DO WRITE(*,'(a,6f10.4)') ' cell:',cell ! read column labels and comment string='' DO i=1,(header(8)+3)/4 READ(1)string(1+(i-1)*4:i*4),separator END DO WRITE(*,'(a)') TRIM(string) ! read reflections READ(1) refdat(1:2) ! needed for positioning - what is their meaning? DO READ(1) refdat IF (refdat(1)==32767) EXIT WRITE(*,*) refdat END DO END