mirror of https://github.com/python/cpython.git
155 lines
4.1 KiB
Python
155 lines
4.1 KiB
Python
# A parallelized "find(1)" using the thread module.
|
|
|
|
# This demonstrates the use of a work queue and worker threads.
|
|
# It really does do more stats/sec when using multiple threads,
|
|
# although the improvement is only about 20-30 percent.
|
|
# (That was 8 years ago. In 2002, on Linux, I can't measure
|
|
# a speedup. :-( )
|
|
|
|
# I'm too lazy to write a command line parser for the full find(1)
|
|
# command line syntax, so the predicate it searches for is wired-in,
|
|
# see function selector() below. (It currently searches for files with
|
|
# world write permission.)
|
|
|
|
# Usage: parfind.py [-w nworkers] [directory] ...
|
|
# Default nworkers is 4
|
|
|
|
|
|
import sys
|
|
import getopt
|
|
import time
|
|
import os
|
|
from stat import *
|
|
import _thread as thread
|
|
|
|
|
|
# Work queue class. Usage:
|
|
# wq = WorkQ()
|
|
# wq.addwork(func, (arg1, arg2, ...)) # one or more calls
|
|
# wq.run(nworkers)
|
|
# The work is done when wq.run() completes.
|
|
# The function calls executed by the workers may add more work.
|
|
# Don't use keyboard interrupts!
|
|
|
|
class WorkQ:
|
|
|
|
# Invariants:
|
|
|
|
# - busy and work are only modified when mutex is locked
|
|
# - len(work) is the number of jobs ready to be taken
|
|
# - busy is the number of jobs being done
|
|
# - todo is locked iff there is no work and somebody is busy
|
|
|
|
def __init__(self):
|
|
self.mutex = thread.allocate()
|
|
self.todo = thread.allocate()
|
|
self.todo.acquire()
|
|
self.work = []
|
|
self.busy = 0
|
|
|
|
def addwork(self, func, args):
|
|
job = (func, args)
|
|
self.mutex.acquire()
|
|
self.work.append(job)
|
|
self.mutex.release()
|
|
if len(self.work) == 1:
|
|
self.todo.release()
|
|
|
|
def _getwork(self):
|
|
self.todo.acquire()
|
|
self.mutex.acquire()
|
|
if self.busy == 0 and len(self.work) == 0:
|
|
self.mutex.release()
|
|
self.todo.release()
|
|
return None
|
|
job = self.work[0]
|
|
del self.work[0]
|
|
self.busy = self.busy + 1
|
|
self.mutex.release()
|
|
if len(self.work) > 0:
|
|
self.todo.release()
|
|
return job
|
|
|
|
def _donework(self):
|
|
self.mutex.acquire()
|
|
self.busy = self.busy - 1
|
|
if self.busy == 0 and len(self.work) == 0:
|
|
self.todo.release()
|
|
self.mutex.release()
|
|
|
|
def _worker(self):
|
|
time.sleep(0.00001) # Let other threads run
|
|
while 1:
|
|
job = self._getwork()
|
|
if not job:
|
|
break
|
|
func, args = job
|
|
func(*args)
|
|
self._donework()
|
|
|
|
def run(self, nworkers):
|
|
if not self.work:
|
|
return # Nothing to do
|
|
for i in range(nworkers-1):
|
|
thread.start_new(self._worker, ())
|
|
self._worker()
|
|
self.todo.acquire()
|
|
|
|
|
|
# Main program
|
|
|
|
def main():
|
|
nworkers = 4
|
|
opts, args = getopt.getopt(sys.argv[1:], '-w:')
|
|
for opt, arg in opts:
|
|
if opt == '-w':
|
|
nworkers = int(arg)
|
|
if not args:
|
|
args = [os.curdir]
|
|
|
|
wq = WorkQ()
|
|
for dir in args:
|
|
wq.addwork(find, (dir, selector, wq))
|
|
|
|
t1 = time.time()
|
|
wq.run(nworkers)
|
|
t2 = time.time()
|
|
|
|
sys.stderr.write('Total time %r sec.\n' % (t2-t1))
|
|
|
|
|
|
# The predicate -- defines what files we look for.
|
|
# Feel free to change this to suit your purpose
|
|
|
|
def selector(dir, name, fullname, stat):
|
|
# Look for world writable files that are not symlinks
|
|
return (stat[ST_MODE] & 0o002) != 0 and not S_ISLNK(stat[ST_MODE])
|
|
|
|
|
|
# The find procedure -- calls wq.addwork() for subdirectories
|
|
|
|
def find(dir, pred, wq):
|
|
try:
|
|
names = os.listdir(dir)
|
|
except os.error as msg:
|
|
print(repr(dir), ':', msg)
|
|
return
|
|
for name in names:
|
|
if name not in (os.curdir, os.pardir):
|
|
fullname = os.path.join(dir, name)
|
|
try:
|
|
stat = os.lstat(fullname)
|
|
except os.error as msg:
|
|
print(repr(fullname), ':', msg)
|
|
continue
|
|
if pred(dir, name, fullname, stat):
|
|
print(fullname)
|
|
if S_ISDIR(stat[ST_MODE]):
|
|
if not os.path.ismount(fullname):
|
|
wq.addwork(find, (fullname, pred, wq))
|
|
|
|
|
|
# Call the main program
|
|
|
|
main()
|