FIX protocol messages latency calculation using awk

#  awk based script to calculate the latency of FIX protocoll messages.
#  usage: awk -f $someroot/ackdelta.awk [VERBOSE=1] [STAT=1] $someroot/FIX.log 

# Author : Sakthivel Kathirvel
# last.modified=Thursday, Mar 28, 2007

#This program is free software; you can redistribute it and/or
#modify it under the terms of the GNU General Public License
#as published by the Free Software Foundation; either version 2
#of the License, or (at your option) any later version.

#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#GNU General Public License for more details.

#You should have received a copy of the GNU General Public License
#along with this program; if not, write to the Free Software
#Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.#  


# sample output => line format => ,, ,   
#   D,AAA 0002/02222007,50,10:40:13.329,10:40:13.379
#   D,AAA 0003/02222007,47,10:51:22.845,10:51:22.892
#   D,AAA 0004/02222007,4,10:58:46.246,10:58:46.250
#   D,AAA 0005/02222007,5,11:00:31.500,11:00:31.505
#   D,AAA 0006/02222007,4,11:01:35.909,11:01:35.913

#  sample output with VERBOSE ON
#    timeline=>_Feb_22_10:44:38.431068::mesgtype=>35=D::clordid=>11=AAA 0001/02222007
#    timeline=>_Feb_22_10:44:38.452347::mesgtype=>35=8::clordid=>11=AAA 0001/02222007::exetype=>150=0::ordstatus=>39=0
#    

#  sample output with STAT ON
############### Stats ###############
#   Min delay=33 ms.
#   Max delay=211 ms.
#   Aver. delay=79 ms. of 1216 acks
#   No of ClOrdId processed=1216
#   Process Started...Thu Feb 28 08:50:17 CST 2007
#   Process Ended ...Thu Feb 28 08:50:23 CST 2007
#   Processing took 6 Secs

# Preconditions and assumptions
#   assump.1=the trade log timestamp format = Fri Feb 22 10:50:07.604341
#   assump.2= trade log format has the datestamp appears exactly 2 lines above trademesg lines containing order[35=D] and ack[35=8].
#   assump.3= trademesg lines containing FIX tags uses 'SPACE' or '^A' as line delimiter
#   assump.4=ClOrdId tag value contains space. this space should be accounted for while reading 'full' clordid value while tokenzing the lines based on space as delimiter.
#   assump.5= in tradelog files, trademesg lines containing FIX tags may appear in different order
#   assump.6= the execution report 35=8 can have ExecType 150=0 or 150=A

# last.modified=Thursday, Mar 28, 2007

# impls details
# to speed up processing associative array is used. ClordId is used as index.
# lineBuffer is flushed frequently to optimize the memory usage while processing Laaarge log flies, only last 4 lines read are kept in memory. .
# the input line is normalized to use ' ' as delimiter. ^A characters are normalized to space
# dup clordid with 35=D are scrubbed out.
# clordid with 35=D that dont have match 35=8 with 150=0 or A are aggregaged and printed in stat; STAT switch need to be enabled.
# order and ack timestamp are scaled from micro to mill secs [ rounded off to 3 digits ]
BEGIN {
  linePtr=1;
  clordidCtr=1; #unique clordid with msgtype 35=D
  timeformat = "%a %b %d %H:%M:%S %Z %Y";
  startTimeSec=systime();
  startTS=sprintf("Process Started...%s", strftime(timeformat,systime()));
}
{
  gsub(/ /, "|", $0); # handles if the line delimiter is ascii control character ^A (SOH)
  aline[linePtr++]=$0;
}

# catches (In)coming New Order[35=D] line and extracts ClOrdId fields
/.*35=D.*11=.*59=3.*/{
    mesgtype=getMsgType(aline[linePtr-1]);
    orderLineClOrdID=getClordId(aline[linePtr-1]);
    orderTimeLine=getTimeLine(aline[linePtr-1]);
    if (VERBOSE ~ /[0-9]+/) printf("orderTimeLine=>%s::mesgtype=>%s::clordid=>%s\n", orderTimeLine, mesgtype,orderLineClOrdID )
    # convert the seconds part in the orderTimeLine from microsec to millisec [ rounding to 3 digit precision ]
    inSecPart=substr(orderTimeLine, index(orderTimeLine, ".")+1)
    orderLineClOrdIDValue=substr(orderLineClOrdID, index(orderLineClOrdID, "=")+1);
    # check for duplicate ClOrdID with 35=D and take only unique ones.
    if (length(clordid[orderLineClOrdIDValue]) > 0 ) {
        dupclordid[orderLineClOrdIDValue]="";
    } else {
        clordid[orderLineClOrdIDValue]=sprintf("%s,%s%d",
                                                      substr(mesgtype,index(mesgtype,"=")+1), 
                                                      substr(orderTimeLine, 1, index(orderTimeLine, ".")), 
                                                      inSecPart);
    }
    #flushes out lineBuffer
    delete aline;linePtr=1;

}
# scans a line for execution report[35=8] that are [150=0] or [150=A]  new Orders for any of ClordId-with-[35=D] 
/.*35=8.*150=[0A].*59=3.*/ {
    ackLineClOrdID=getClordId(aline[linePtr-1]);
    ackLineClOrdIDValue=substr(ackLineClOrdID, index(ackLineClOrdID, "=")+1);
    if (length(clordid[ackLineClOrdIDValue]) > 0 ) {
        ackTimeLine=getTimeLine(aline[linePtr-1]);
        if (VERBOSE ~ /[0-9]+/) printf("ACKtimeline=>%s::mesgtype=>%s::clordid=>%s::exetype=>%s::ordstatus=>%s\n", ackTimeLine, getMsgType(aline[linePtr-1]),ackLineClOrdID, getExecType(aline[linePtr-1]),getOrderStatus(aline[linePtr-1]))
        split(clordid[ackLineClOrdIDValue], PARTS, ",");
        inTimeLineinMilSec=PARTS[2];
        # convert the seconds part in the ackTimeLine from microsec to millisecs [ rounding to 3 digit precision ]
        ackSecPart=substr(ackTimeLine, index(ackTimeLine, ".")+1);
        ackTimeLineinMilSec=sprintf("%s%d",substr(ackTimeLine, 1, index(ackTimeLine,".")), ackSecPart);
        clordid[ackLineClOrdIDValue]=sprintf("match, %s,%s,%d",
                                                    clordid[ackLineClOrdIDValue],
                                                    ackTimeLineinMilSec,
                                                    getDelta(inTimeLineinMilSec,ackTimeLineinMilSec));
                                                    
    }
    #flushes out lineBuffer
    delete aline;linePtr=1;
}

END {
  min=9999;max=-1;aver=0;
  for (assocIndex in clordid ) {
      split(clordid[assocIndex],PARTS,",");
      if ("match" ~ PARTS[1]) {
          clordidCtr++;
          deltaPart=PARTS[5];
          if (int(deltaPart) >= int(max)) {max=deltaPart; }
          if (int(deltaPart) < int(min)) {min=deltaPart;}
          aver=aver+deltaPart;
          sub(/ /,"",PARTS[2]);
          printf("%s,%s,%s,%s,%s\n", PARTS[2],assocIndex,deltaPart,PARTS[3],PARTS[4]); 
      } else {
         nomatch[assocIndex]="";
      }
  }
  
  
  if (STAT ~ /[0-9]+/) {
      print "############### Stats ###############"
      printf ("Min delay=%d ms.\n", min)
      printf ("Max delay=%d ms.\n", max)
      printf ("Aver. delay=%d ms. of %i acks\n", aver/(clordidCtr-1),(clordidCtr-1) )
      printf ("No of ClOrdId processed=%d\n",clordidCtr-1);
      print startTS 
      endTimeSec=systime();
      printf("Process Ended ...%s\n", strftime(timeformat,systime()));
      printf("Processing took %d Secs\n",  (endTimeSec-startTimeSec));
      
      for (assocIndex in nomatch) {
            print assocIndex;
      }

      for (assocIndex in dupclordid ) {
            print assocIndex;
      }
  }
}

function getTimeLine(aLine) {
    linefields = split(aLine, FIELDS, "|");
    return sprintf("%s",FIELDS[1]);
}

function getClordId(aLine) {
   linefields = split(aLine, FIELDS, "|");
   for(ctr=1; ctr<=linefields; ctr++) {
     if (FIELDS[ctr] ~ "11=") {
       return sprintf("%s %s",  FIELDS[ctr], FIELDS[ctr+1]);
     }
   }
}

function getMsgType(aLine) {
   linefields = split(aLine, FIELDS, "|");
   for(ctr=1; ctr<=linefields; ctr++) {
     if (FIELDS[ctr] ~ "35=") {
       return sprintf("%s",  FIELDS[ctr]);
     }
   }
}

function getOrderStatus(aLine) {
   linefields = split(aLine, FIELDS, "|");
   for(ctr=1; ctr<=linefields; ctr++) {
     if (FIELDS[ctr] ~ "39=") {
       return sprintf("%s",  FIELDS[ctr]);
     }
   }
}

function getExecType(aLine) {
   linefields = split(aLine, FIELDS, "|");
   for(ctr=1; ctr<=linefields; ctr++) {
     if (FIELDS[ctr] ~ "150=") {
       return sprintf("%s",  FIELDS[ctr]);
     }
   }
}

function getStats() {
# todo
#find min, max and average of the deltas
}

function getDelta(inTime,outTime) {

  split(inTime, inTimeParts, ".")
  split(outTime, outTimeParts, ".")
  split(inTimeParts[1],inTimeHMSParts, ":")
  split(outTimeParts[1],outTimeHMSParts, ":")

  inTimeMilsec=(inTimeHMSParts[1]*60*60*1000) + (inTimeHMSParts[2]*60*1000) + (inTimeHMSParts[3]*1000) + inTimeParts[2]
  outTimeMilSec=(outTimeHMSParts[1]*60*60*1000) + (outTimeHMSParts[2]*60*1000) + (outTimeHMSParts[3]*1000) + outTimeParts[2]
  return (outTimeMilSec-inTimeMilsec)
}

No comments: