diff --git a/doc/sql/mysql.sql b/doc/sql/mysql.sql index 2e04ee9e..f9c19ad7 100644 --- a/doc/sql/mysql.sql +++ b/doc/sql/mysql.sql @@ -56,6 +56,7 @@ CREATE TABLE `downloads` ( `timestamp` datetime NOT NULL, `url` text NOT NULL, `outfile` text NOT NULL, + `shasum` varchar(64) default NULL, PRIMARY KEY (`id`), KEY `session` (`session`,`timestamp`) ) ; diff --git a/doc/sql/update8.sql b/doc/sql/update8.sql new file mode 100644 index 00000000..25f61dba --- /dev/null +++ b/doc/sql/update8.sql @@ -0,0 +1 @@ +ALTER TABLE `downloads` ADD `shasum` VARCHAR(64) DEFAULT NULL; diff --git a/kippo/commands/wget.py b/kippo/commands/wget.py index cc89d99d..87cd91b8 100644 --- a/kippo/commands/wget.py +++ b/kippo/commands/wget.py @@ -13,8 +13,9 @@ import urlparse import random import re import exceptions -import os.path +import os import getopt +import hashlib commands = {} @@ -94,13 +95,15 @@ class command_wget(HoneyPotCommand): if cfg.has_option('honeypot', 'download_limit_size'): self.limit_size = int(cfg.get('honeypot', 'download_limit_size')) + self.download_path = cfg.get('honeypot', 'download_path') + self.safeoutfile = '%s/%s_%s' % \ - (cfg.get('honeypot', 'download_path'), + (self.download_path, time.strftime('%Y%m%d%H%M%S'), re.sub('[^A-Za-z0-9]', '_', url)) self.deferred = self.download(url, outfile, self.safeoutfile) if self.deferred: - self.deferred.addCallback(self.success) + self.deferred.addCallback(self.success, outfile) self.deferred.addErrback(self.error, url) def download(self, url, fakeoutfile, outputfile, *args, **kwargs): @@ -138,7 +141,36 @@ class command_wget(HoneyPotCommand): self.writeln('^C') self.connection.transport.loseConnection() - def success(self, data): + def success(self, data, outfile): + if not os.path.isfile(self.safeoutfile): + log.msg("there's no file " + self.safeoutfile) + self.exit() + + shasum = hashlib.sha256(open(self.safeoutfile, 'rb').read()).hexdigest() + hash_path = '%s/%s' % (self.download_path, shasum) + + # if we have content already, delete temp file + if not os.path.exists(hash_path): + os.rename(self.safeoutfile, hash_path) + else: + os.remove(self.safeoutfile) + log.msg("Not storing duplicate content " + shasum) + + self.honeypot.logDispatch( format='Downloaded URL (%(url)s) with SHA-256 %(shasum)s to %(outfile)s', + eventid='KIPP0007', url=self.url, outfile=hash_path, shasum=shasum ) + + log.msg( format='Downloaded URL (%(url)s) with SHA-256 %(shasum)s to %(outfile)s', + eventid='KIPP0007', url=self.url, outfile=hash_path, shasum=shasum ) + + # link friendly name to hash + os.symlink( shasum, self.safeoutfile ) + + # FIXME: is this necessary? + self.safeoutfile = hash_path + + # update the honeyfs to point to downloaded file + f = self.fs.getfile(outfile) + f[A_REALFILE] = hash_path self.exit() def error(self, error, url): @@ -197,11 +229,6 @@ class HTTPProgressDownloader(client.HTTPDownloader): (self.wget.url,) ) self.fileName = os.path.devnull self.nomore = True - else: - log.msg( 'Saving URL (%s) to %s' % (self.wget.url, self.fileName) ) - # we're in a completely different class here. need to use logDispatch - self.wget.honeypot.logDispatch( format='Saving URL (%(url)s) to %(outfile)s', - eventid='KIPP0007', url=self.wget.url, outfile=self.fileName ) self.wget.writeln('Saving to: `%s' % self.fakeoutfile) self.wget.honeypot.terminal.nextLine() @@ -259,6 +286,8 @@ class HTTPProgressDownloader(client.HTTPDownloader): self.wget.fs.update_realfile( self.wget.fs.getfile(self.fakeoutfile), self.wget.safeoutfile) + + self.wget.fileName = self.fileName return client.HTTPDownloader.pageEnd(self) # vim: set sw=4 et: diff --git a/kippo/dblog/jsonlog.py b/kippo/dblog/jsonlog.py index a199cc8f..b2135e50 100644 --- a/kippo/dblog/jsonlog.py +++ b/kippo/dblog/jsonlog.py @@ -66,7 +66,7 @@ class DBLogger(dblog.DBLogger): self.write( session, logentry ) def handleFileDownload(self, session, args): - logentry = { 'message' : 'File download: [%s] -> %s' % (args['url'], args['outfile']), 'url' : args['url'] } + logentry = { 'message' : 'File download: [%s] -> %s' % (args['url'], args['outfile']), 'url' : args['url'], 'shasum' : args['shasum'] } self.write( session, logentry ) # vim: set sw=4 et: diff --git a/kippo/dblog/mysql.py b/kippo/dblog/mysql.py index 6232dea8..0e049a46 100644 --- a/kippo/dblog/mysql.py +++ b/kippo/dblog/mysql.py @@ -142,8 +142,8 @@ class DBLogger(dblog.DBLogger): def handleFileDownload(self, session, args): self.simpleQuery('INSERT INTO `downloads`' + \ - ' (`session`, `timestamp`, `url`, `outfile`)' + \ + ' (`session`, `timestamp`, `url`, `outfile`, `shasum`)' + \ ' VALUES (%s, FROM_UNIXTIME(%s), %s, %s)', - (session, self.nowUnix(), args['url'], args['outfile'])) + (session, self.nowUnix(), args['url'], args['outfile'], args['shasum'])) # vim: set sw=4 et: diff --git a/kippo/dblog/textlog.py b/kippo/dblog/textlog.py index 79c4edff..9f8169cf 100644 --- a/kippo/dblog/textlog.py +++ b/kippo/dblog/textlog.py @@ -50,7 +50,7 @@ class DBLogger(dblog.DBLogger): self.write(session, 'Client version: [%s]' % (args['version'],)) def handleFileDownload(self, session, args): - self.write(session, 'File download: [%s] -> %s' % \ - (args['url'], args['outfile'])) + self.write(session, 'File download: [%s] -> %s with SHA-256 %s' % \ + (args['url'], args['outfile'], args['shasum'])) # vim: set sw=4 et: