Reflection files format: Difference between revisions
(→LCF) |
(→LCF) |
||
Line 244: | Line 244: | ||
! read column labels and comment | ! read column labels and comment | ||
string='' | string='' | ||
DO i=1, | DO i=1,(offset-124)/16 | ||
READ(1)string(1+(i-1)*4:i*4),separator | READ(1)string(1+(i-1)*4:i*4),separator | ||
END DO | END DO |
Revision as of 20:48, 14 July 2019
CCP4 reflection files are binary files that are usually read by routines from CCP4 libraries. However, sometimes one wants to read them with a different programming language or without linking to the libraries, and it may be fun to work out alternative ways.
MTZ
A standalone Fortran program readmtz.f90 to read two MTZ files (both with at least one amplitude and one phase column), and to calculate the correlation coefficient of their electron densities, is this:
! gfortran -C -fbacktrace readmtz.f90 -o readmtz MODULE readmtz_mod ! minimal MTZ file reader - Kay Diederichs 11/2018 . According to ! http://www.ccp4.ac.uk/html/mtzformat.html which is not completely correct ! e.g. the reflection data do not start at byte 21=20+1, but at byte 81=4*20+1 TYPE data REAL :: ampli=-1. REAL :: phase END TYPE data CONTAINS SUBROUTINE readmtz(file,nref,refdat,colnames) IMPLICIT NONE CHARACTER, INTENT(IN) :: file*80 INTEGER, INTENT(OUT) :: nref REAL, ALLOCATABLE :: refdat(:,:) CHARACTER*30, ALLOCATABLE :: colnames(:) ! local variables INTEGER :: i,ncol,nbatch CHARACTER :: string*80,colnam(200)*30 nref=-1 OPEN(1,FILE=file,ACCESS='STREAM',ACTION='READ') ! is this BIG_ENDIAN or LITTLE_ENDIAN? we know where first hkl triple is IF (ALLOCATED(refdat)) DEALLOCATE(refdat) IF (ALLOCATED(colnames)) DEALLOCATE(colnames) ALLOCATE(refdat(3,1)) READ(1,POS=81) refdat IF (NINT(refdat(1,1))/=refdat(1,1).OR.NINT(refdat(2,1))/=refdat(2,1).OR.NINT(refdat(3,1))/=refdat(3,1)) THEN CLOSE(1) OPEN(1,FILE=string,ACCESS='STREAM',ACTION='READ',CONVERT='BIG_ENDIAN') PRINT *,'this file seems to be BIG_ENDIAN - was it written on SGI?' END IF DEALLOCATE(refdat) ! where is the header? READ(1,POS=5) i ! PRINT*,'header location:',i READ(1,POS=4*i-3) ! position before header ! read the header records, and print important ones to the screen i=0 DO READ(1)string IF (string(1:4)=='END ') EXIT ! skip history information and batch orientations IF (string(1:8)=='COLUMN H') WRITE(*,'(a)') & '# column name type min max set' IF (string(1:4)=='CELL'.OR.string(1:6)=='SYMINF') WRITE(*,'(a80)') string IF (string(1:6)=='COLUMN') THEN i=i+1 WRITE(*,'(i2,1x,a80)') i,string IF (i>200) STOP 'more than 200 columns? www.ccp4.ac.uk/html/mtzformat.html says 200!' colnam(i)=string(8:37) END IF IF (string(1:4)=='NCOL') THEN READ(string(5:),*) ncol,nref,nbatch PRINT*,'ncol, nref, nbatch=',ncol,nref,nbatch END IF END DO IF (nref==-1) STOP 'could not find NCOL record in header' IF (nbatch/=0) STOP 'nbatch must be 0' IF (i/=ncol) STOP 'column names and NCOL do not match' ALLOCATE(refdat(ncol,nref),colnames(ncol)) colnames=colnam(:ncol) READ(1,POS=81) ! position of reflection data DO i=1,nref READ(1) refdat(:,i) END DO CLOSE(1) END SUBROUTINE readmtz SUBROUTINE copy_given_cols(file,col_ampli,col_phase,refdat,colnames,data3d) CHARACTER, INTENT(IN) :: file*80,col_phase*30,col_ampli*30 REAL, ALLOCATABLE :: refdat(:,:) CHARACTER*30, ALLOCATABLE, INTENT(IN) :: colnames(:) TYPE(data), ALLOCATABLE :: data3d(:,:,:) INTEGER :: i,j,nref,h,k,l,hmin,kmin,lmin,hmax,kmax,lmax,icol_ampli,icol_phase icol_ampli=0 DO i=1,SIZE(colnames) IF (colnames(i)==col_ampli) THEN IF (icol_ampli==0) THEN icol_ampli=i ELSE STOP 'the amplitude column occured more than once!' END IF END IF END DO icol_phase=0 DO i=1,SIZE(colnames) IF (colnames(i)==col_phase) THEN IF (icol_phase==0) THEN icol_phase=i ELSE STOP 'the phase column occured more than once!' END IF END IF END DO WRITE(*,*)'indices of ampli ',TRIM(col_ampli),' and phase ',TRIM(col_phase),' columns:',icol_ampli,icol_phase IF (icol_ampli==0) STOP 'could not find ampli column' IF (icol_phase==0) STOP 'could not find phase column' nref=SIZE(refdat,DIM=2) hmin=MINVAL(NINT(refdat(1,:))) hmax=MAXVAL(NINT(refdat(1,:))) kmin=MINVAL(NINT(refdat(2,:))) kmax=MAXVAL(NINT(refdat(2,:))) lmin=MINVAL(NINT(refdat(3,:))) lmax=MAXVAL(NINT(refdat(3,:))) ALLOCATE(data3d(hmin:hmax,kmin:kmax,lmin:lmax)) j=0 DO i=1,nref h=NINT(refdat(1,i)) k=NINT(refdat(2,i)) l=NINT(refdat(3,i)) IF (isnan(refdat(icol_phase,i)).OR.isnan(refdat(icol_ampli,i))) CYCLE j=j+1 IF (refdat(icol_ampli,i)<0.) THEN PRINT*,i,refdat(icol_ampli,i) STOP 'error: amplitude <0 ?' END IF data3d(h,k,l)%phase=refdat(icol_phase,i) data3d(h,k,l)%ampli=refdat(icol_ampli,i) END DO PRINT*,'number with ampli=NaN or phase =NaN',nref-j PRINT*,'' END SUBROUTINE copy_given_cols END MODULE readmtz_mod PROGRAM main ! read two files, and calculate the correlation coefficient based on given columns USE readmtz_mod IMPLICIT NONE REAL, PARAMETER :: deg2rad=ATAN(1.)/45. REAL, ALLOCATABLE :: refdat(:,:) CHARACTER :: file*80,arg*80,col_phase*30,col_ampli*30 CHARACTER*30, ALLOCATABLE :: colnames(:) TYPE(data), ALLOCATABLE :: d1(:,:,:),d2(:,:,:) INTEGER :: i,j,nref,h,k,l,hmin,kmin,lmin,hmax,kmax,lmax REAL :: cc_zaehler,cc_nenner,delphi IF (COMMAND_ARGUMENT_COUNT()==0) STOP 'usage: readmtz file1 ampli_nam phase_nam file2 ampli_nam phase_nam' ! read first MTZ file CALL GET_COMMAND_ARGUMENT(1,file) WRITE(*,*) TRIM(file) CALL readmtz(file,nref,refdat,colnames) CALL GET_COMMAND_ARGUMENT(2,col_ampli) ! expects column name of amplitude CALL GET_COMMAND_ARGUMENT(3,col_phase) ! expects column name of phase CALL copy_given_cols(file,col_ampli,col_phase,refdat,colnames,d1) ! read second MTZ file CALL GET_COMMAND_ARGUMENT(4,file) WRITE(*,*) TRIM(file) CALL readmtz(file,nref,refdat,colnames) CALL GET_COMMAND_ARGUMENT(5,col_ampli) ! expects column name of amplitude CALL GET_COMMAND_ARGUMENT(6,col_phase) ! expects column name of phase CALL copy_given_cols(file,col_ampli,col_phase,refdat,colnames,d2) ! common array bounds hmin=MAX(LBOUND(d1,DIM=1),LBOUND(d2,DIM=1)) hmax=MIN(UBOUND(d1,DIM=1),UBOUND(d2,DIM=1)) kmin=MAX(LBOUND(d1,DIM=2),LBOUND(d2,DIM=2)) kmax=MIN(UBOUND(d1,DIM=2),UBOUND(d2,DIM=2)) lmin=MAX(LBOUND(d1,DIM=3),LBOUND(d2,DIM=3)) lmax=MIN(UBOUND(d1,DIM=3),UBOUND(d2,DIM=3)) ! do some calculation: cc_zaehler=0. cc_nenner=0. DO l=lmin,lmax DO k=kmin,kmax DO h=hmin,hmax IF (d1(h,k,l)%ampli<0.OR.d2(h,k,l)%ampli<0.) CYCLE delphi=ABS( d1(h,k,l)%phase - d2(h,k,l)%phase ) IF (delphi>180.) delphi=360.-delphi IF (ABS(delphi)>180.) STOP 'assert error delphi' cc_zaehler = cc_zaehler + d1(h,k,l)%ampli**2 * COS(delphi*deg2rad) cc_nenner = cc_nenner + d1(h,k,l)%ampli**2 END DO END DO END DO IF (cc_nenner>0.) cc_zaehler = cc_zaehler/cc_nenner PRINT*,'cc=',cc_zaehler END PROGRAM main
LCF
The CCP4 LCF (labelled column format) files were phased-out in the early 90's but some interesting data still lingers on half-inch tapes so, if they can be read, the LCF files can be converted to ASCII or other formats for safe-keeping, or further use. Jonathan Cooper has worked out how to read them, mainly using Python, and documents this.
A Fortran program (lcf_dump) to accomplish this is:
! reads cell from LCF file - Kay Diederichs 7/2019 . ! TODO - determine ncol, offset from data in file ! reading VAX format is easy with the ifort compiler since it understands VAX REAL format: ! ifort lcf_dump.f90 -o lcf_dump IMPLICIT NONE ! LCF items INTEGER(2) :: header(8) INTEGER(2), ALLOCATABLE :: refdat(:) REAL :: cell(6) CHARACTER :: separator*12 ! length may depend on machine that wrote LCF file ! program variables INTEGER :: i,ncol,offset CHARACTER :: string*120 i=COMMAND_ARGUMENT_COUNT() IF (i/=3) STOP 'usage: lcf_dump <name.LCF> ncol offset' ! read command line CALL GET_COMMAND_ARGUMENT(1,string) ! expects LCF filename on command line ! OPEN(CONVERT=...) options are documented at ! https://software.intel.com/en-us/fortran-compiler-developer-guide-and-reference-open-convert-specifier OPEN(1,FILE=string,ACCESS='STREAM',ACTION='READ',CONVERT='VAXG') WRITE(*,*) 'LCF file: ',TRIM(string) CALL GET_COMMAND_ARGUMENT(2,string) ! expects NCOL READ(string,*) ncol ALLOCATE(refdat(ncol)) CALL GET_COMMAND_ARGUMENT(3,string) ! expects NCOL READ(string,*) offset WRITE(*,*) 'ncol, offset:',ncol,offset ! read header and cell READ(1) header,separator ! reads 8*2+12 bytes. ends after byte 28 WRITE(*,'(a,8(i0,1x))') ' header: ',header DO i=1,6 READ(1) cell(i),separator ! reads 6*(4+12) bytes. ends after byte 124 END DO WRITE(*,'(a,6f10.4)') ' cell:',cell ! read column labels and comment string='' DO i=1,(offset-124)/16 READ(1)string(1+(i-1)*4:i*4),separator END DO WRITE(*,'(a)') TRIM(string) ! read reflections READ(1) refdat(1:2) ! needed for positioning - what is their meaning? DO READ(1) refdat IF (refdat(1)==32767) EXIT WRITE(*,*) refdat END DO END