#!/usr/bin/env python # ****************************************************** # Copyright 2006: Commonwealth of Australia. # # Developed by the Computer Network Vulnerability Team, # Information Security Group. # Department of Defence. # # Michael Cohen # # ****************************************************** # Version: FLAG $Version: 0.87-pre1 Date: Thu Jun 12 00:48:38 EST 2008$ # ****************************************************** # # * This program is free software; you can redistribute it and/or # * modify it under the terms of the GNU General Public License # * as published by the Free Software Foundation; either version 2 # * of the License, or (at your option) any later version. # * # * This program is distributed in the hope that it will be useful, # * but WITHOUT ANY WARRANTY; without even the implied warranty of # * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # * GNU General Public License for more details. # * # * You should have received a copy of the GNU General Public License # * along with this program; if not, write to the Free Software # * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # ****************************************************** """ A library for handling IE History index.dat files Supports IE versions from IE 5 onwards. """ import format,sys from format import * from plugins.FileFormats.BasicFormats import * class Header(SimpleStruct): fields = [ [ 'Magic', STRING, {"length":0x1c} ], [ 'file_size', LONG], [ 'hash_offset', LONG], [ 'unknown', WORD_ARRAY,{'count':7}], [ 'blocksize', LONG], ] ## The default blocksize blocksize=0x80 class Hash(LONG_ARRAY): """ A data structure representing the index list in the history file. The hash section represents the offsets to all the url blocks in the file. We collect all of these and return a list of all offsets in all hash sections. Note that the hash section may point at more hash sections, which we automatically traverse all sections, so callers do not need to worry about looking for more hash sections. """ def read(self): data = [] try: magic=STRING(self.buffer,length=4) except: raise IOError("Cant read any more") # Check the magic for this section if magic!='HASH': raise IOError("Location %s is not a hash array - This file may be empty!!"%(data.offset)) section_length = LONG(self.buffer[4:]).get_value() self.next_hash_offset = LONG(self.buffer[8:]).get_value() offset=16 while offset 0: try: h=Hash(self.buffer[hash_offset:],1) self.hashes.extend(h.data) hash_offset = h.next_hash_offset except IOError: break def __iter__(self): self.hash_iter=self.hashes.__iter__() return self def next(self): result={} ## Chase all offsets to only include those in the file while 1: try: offset=self.hash_iter.next() entry_type = STRING(self.buffer.set_offset(offset),length=4).__str__() except IOError: continue break if entry_type == 'URL ': entry=URLEntry(self.buffer[offset:]) result['event']=entry for key in ('type','modified_time','accessed_time','url','filename', 'size','directory_index'): result[key]=entry[key] result['offset'] = offset c=entry['content'].get_value() result['data']=c for key in ('content_type','data'): result[key]=c[key] return result return None if __name__ == "__main__": fd=open(sys.argv[1],'r') import time,sys a=time.time() history=IEHistoryFile(fd) for event in history: if event: print "url is %s" % event['event'], event['event']['content'].get_value() sys.stderr.write("Completed in %s seconds\n" % (time.time()-a))