Python Script to Detect Hidden Data

Article Posted: January 01, 2012

The following is a simple python script to detect anomalies in video files that could indicate hidden data as referenced in Chet Hosmer's article "The Future of Steganography."

#!/usr/bin/env python

# January 2012 Discovering the Hidden Column by C. Hosmer
# DFI Column Python Script Sample Method to Detect TCSTEGO Data Hiding
# Version .90
# Source is released for public use provided that you reference the source article

# IMPORTS

import struct # used for packing/unpacking binary data
import sys # command line arguments
import os.path # file handling

# constants
DEFAULTPOSITION = -1
MB_100 = 104857600
GB = 1073741824
MB = 1048576
KB = 1024
THRESHOLD = 48 #BYTES
START_POS = 12 # SKIPS OVER 4 byte container name stco, 4 byte version + flags, 4 byte number of elements (count)
STCO_STR = "stco\0\0\0\0"
STCO_LEN = 8
BIGINDIAN = ">I"
STEG_SUSPECTED = "Stego Suspected detected Orphan Chunk : %s"
STEG_NOT_SUSPECTED = "NO Stego Suspected No Orphans Found"

"""
FUNCTION: ffind
This functions purpose to find the stco table in a file, without loading the whole file.
Loading the whole file may cause memory errors
"""
def ffind(fh, target, offset=0):
    buff = []
    pos = DEFAULTPOSITION
    fh.seek(offset)
    ft = DEFAULTPOSITION

    while True:
        buff = fh.read(MB_100) #100 MB read
        pos = buff.find(target)
        if pos != DEFAULTPOSITION:
            pos = fh.tell() - len(buff) + pos
            break
        #endif

        fh.seek(fh.tell() - len(target))
       
        # if we backtracked to the same place then EOF
        if ft >= fh.tell():
            break
        else:
            ft = fh.tell()
        #endif
    #endwhile
    fh.seek(offset)
    return pos
    #enddef

"""
FUNCTION: formatBytes
This functions converts bytes into a more appropriate unit, if needed
"""
def formatBytes(bytes):
    bytes = float(bytes)
    if bytes >= GB:
        gigabytes = bytes / GB
        string = '%.2f GB' % gigabytes
    elif bytes >= MB:
        megabytes = bytes / MB
        string = '%.2f MB' % megabytes
    elif bytes >= KB:
        kilobytes = bytes / KB
        string = '%.2f KB' % kilobytes
    else:
        string = '%.2f ' % bytes
    return string

if __name__ == '__main__':

    if len(sys.argv) < 2:
       print "Please provide a file to analyze..."
       sys.exit() # check that they provided a file
    if os.path.exists(sys.argv[1]) == False:
       print "The file does not exists, please provide a valid path"
       sys.exit()

    #open QuickTime / ISO MPEG-4 ### mov,qt,mp4,m4v,m4a
    fileIn = open(sys.argv[1], 'rb')
   
    mdat_pos = ffind(fileIn,"mdat")
   
    print ""
    print "Scanning File ..."
    # print "MDAT @", mdat_pos

    stco_pos = 0
    foundMdatRef = False
    minimum = sys.maxint
   
    #seacrh for Sample Table Chunk Offsets
   
    while stco_pos > DEFAULTPOSITION:

        stco_pos = ffind(fileIn,STCO_STR, stco_pos + STCO_LEN)

        #quit no STCO struct found
        if stco_pos == DEFAULTPOSITION:
            break
       
        # print "STCO @", stco_pos

        # Determine length and buffer the STCO table
        fileIn.seek(stco_pos - 4)
        sLen = struct.unpack(BIGINDIAN,fileIn.read(4))[0]
        sTable = fileIn.read(sLen)

        count = struct.unpack(BIGINDIAN,sTable[8:12])[0]
        # print "Records in STCO atom:", count

        # Mark the start and end of STCO structure
        startChunkOffsets = START_POS
        endChunkOffsets = startChunkOffsets + (count*4)
 
        # Iterate over the chunkOffsets
        diff = 0
        firstRecord = True

        while startChunkOffsets <= endChunkOffsets - 4:
            offset = struct.unpack(BIGINDIAN,sTable[startChunkOffsets:startChunkOffsets+4])[0]
            diff = offset - mdat_pos + 4
           
            minimum = min(diff,minimum)
        
            #If any chunk references the begining of mdat, quit not-stegged
            if offset == (mdat_pos + 4):
                foundMdatRef = True
                break
            startChunkOffsets += 4
   
        #endwhile

    #endwhile

    print ""
   
    if foundMdatRef or minimum <= THRESHOLD:
        print STEG_NOT_SUSPECTED
    else:
        print STEG_SUSPECTED % (formatBytes(minimum))
        print "Starting at MDAT Offset :", hex(offset)
       
    #close the QuickTime / ISO MPEG-4 ### mov,qt,mp4,m4v,m4a file
    fileIn.close()
 

Related Topics: Computer Forensics Software Toolkit