hydrus/include/HydrusNetworking.py

692 lines
22 KiB
Python

import calendar
import collections
import datetime
import httplib
import HydrusConstants as HC
import HydrusData
import HydrusSerialisable
import socket
import ssl
import threading
import time
def ConvertBandwidthRuleToString( rule ):
( bandwidth_type, time_delta, max_allowed ) = rule
if max_allowed == 0:
return 'No requests currently permitted.'
if bandwidth_type == HC.BANDWIDTH_TYPE_DATA:
s = HydrusData.ConvertIntToBytes( max_allowed )
elif bandwidth_type == HC.BANDWIDTH_TYPE_REQUESTS:
s = HydrusData.ConvertIntToPrettyString( max_allowed )
if time_delta is None:
s += ' per month'
else:
s += ' per ' + HydrusData.ConvertTimeDeltaToPrettyString( time_delta )
return s
def GetLocalConnection( port, https = False ):
old_socket = httplib.socket.socket
httplib.socket.socket = socket._socketobject
try:
if https:
context = ssl.SSLContext( ssl.PROTOCOL_SSLv23 )
context.options |= ssl.OP_NO_SSLv2
context.options |= ssl.OP_NO_SSLv3
connection = httplib.HTTPSConnection( '127.0.0.1', port, timeout = 8, context = context )
else:
connection = httplib.HTTPConnection( '127.0.0.1', port, timeout = 8 )
connection.connect()
finally:
httplib.socket.socket = old_socket
return connection
class BandwidthRules( HydrusSerialisable.SerialisableBase ):
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_BANDWIDTH_RULES
SERIALISABLE_VERSION = 1
def __init__( self ):
HydrusSerialisable.SerialisableBase.__init__( self )
self._lock = threading.Lock()
self._rules = set()
def _GetSerialisableInfo( self ):
return list( self._rules )
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
# tuples converted to lists via json
self._rules = set( ( tuple( rule_list ) for rule_list in serialisable_info ) )
def AddRule( self, bandwidth_type, time_delta, max_allowed ):
with self._lock:
rule = ( bandwidth_type, time_delta, max_allowed )
self._rules.add( rule )
def CanContinueDownload( self, bandwidth_tracker, threshold = 15 ):
with self._lock:
for ( bandwidth_type, time_delta, max_allowed ) in self._rules:
# Do not stop ongoing just because starts are throttled
requests_rule = bandwidth_type == HC.BANDWIDTH_TYPE_REQUESTS
# Do not block an ongoing jpg download because the current month is 100.03% used
wait_is_too_long = time_delta is None or time_delta > threshold
ignore_rule = requests_rule or wait_is_too_long
if ignore_rule:
continue
if bandwidth_tracker.GetUsage( bandwidth_type, time_delta ) >= max_allowed:
return False
return True
def CanDoWork( self, bandwidth_tracker, expected_requests, expected_bytes, threshold = 30 ):
with self._lock:
for ( bandwidth_type, time_delta, max_allowed ) in self._rules:
# Do not prohibit a raft of work starting or continuing because one small rule is over at this current second
if time_delta is not None and time_delta <= threshold:
continue
# we don't want to do a tiny amount of work, we want to do a decent whack
if bandwidth_type == HC.BANDWIDTH_TYPE_REQUESTS:
max_allowed -= expected_requests
elif bandwidth_type == HC.BANDWIDTH_TYPE_DATA:
max_allowed -= expected_bytes
if bandwidth_tracker.GetUsage( bandwidth_type, time_delta ) >= max_allowed:
return False
return True
def CanStartRequest( self, bandwidth_tracker, threshold = 5 ):
with self._lock:
for ( bandwidth_type, time_delta, max_allowed ) in self._rules:
# Do not prohibit a new job from starting just because the current download speed is 210/200KB/s
ignore_rule = bandwidth_type == HC.BANDWIDTH_TYPE_DATA and time_delta is not None and time_delta <= threshold
if ignore_rule:
continue
if bandwidth_tracker.GetUsage( bandwidth_type, time_delta ) >= max_allowed:
return False
return True
def GetWaitingEstimate( self, bandwidth_tracker ):
with self._lock:
estimates = []
for ( bandwidth_type, time_delta, max_allowed ) in self._rules:
if bandwidth_tracker.GetUsage( bandwidth_type, time_delta ) >= max_allowed:
estimates.append( bandwidth_tracker.GetWaitingEstimate( bandwidth_type, time_delta, max_allowed ) )
if len( estimates ) == 0:
return 0
else:
return max( estimates )
def GetUsageStringsAndGaugeTuples( self, bandwidth_tracker, threshold = 600 ):
with self._lock:
rows = []
rules_sorted = list( self._rules )
def key( ( bandwidth_type, time_delta, max_allowed ) ):
return time_delta
rules_sorted.sort( key = key )
for ( bandwidth_type, time_delta, max_allowed ) in rules_sorted:
time_is_less_than_threshold = time_delta is not None and time_delta <= threshold
if time_is_less_than_threshold or max_allowed == 0:
continue
usage = bandwidth_tracker.GetUsage( bandwidth_type, time_delta )
s = 'used '
if bandwidth_type == HC.BANDWIDTH_TYPE_DATA:
s += HydrusData.ConvertValueRangeToBytes( usage, max_allowed )
elif bandwidth_type == HC.BANDWIDTH_TYPE_REQUESTS:
s += HydrusData.ConvertValueRangeToPrettyString( usage, max_allowed ) + ' requests'
if time_delta is None:
s += ' this month'
else:
s += ' in the past ' + HydrusData.ConvertTimeDeltaToPrettyString( time_delta )
rows.append( ( s, ( usage, max_allowed ) ) )
return rows
def GetRules( self ):
with self._lock:
return list( self._rules )
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_BANDWIDTH_RULES ] = BandwidthRules
class BandwidthTracker( HydrusSerialisable.SerialisableBase ):
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_BANDWIDTH_TRACKER
SERIALISABLE_VERSION = 1
# I want to track and query using smaller periods even when the total time delta is larger than the next step up to increase granularity
# for instance, querying minutes for 90 mins time delta is more smooth than watching a juddery sliding two hour window
MAX_SECONDS_TIME_DELTA = 240
MAX_MINUTES_TIME_DELTA = 180 * 60
MAX_HOURS_TIME_DELTA = 72 * 3600
MAX_DAYS_TIME_DELTA = 31 * 86400
CACHE_MAINTENANCE_TIME_DELTA = 120
def __init__( self ):
HydrusSerialisable.SerialisableBase.__init__( self )
self._lock = threading.Lock()
self._next_cache_maintenance_timestamp = HydrusData.GetNow() + self.CACHE_MAINTENANCE_TIME_DELTA
self._months_bytes = collections.Counter()
self._days_bytes = collections.Counter()
self._hours_bytes = collections.Counter()
self._minutes_bytes = collections.Counter()
self._seconds_bytes = collections.Counter()
self._months_requests = collections.Counter()
self._days_requests = collections.Counter()
self._hours_requests = collections.Counter()
self._minutes_requests = collections.Counter()
self._seconds_requests = collections.Counter()
def _GetSerialisableInfo( self ):
dicts_flat = []
for d in ( self._months_bytes, self._days_bytes, self._hours_bytes, self._minutes_bytes, self._seconds_bytes, self._months_requests, self._days_requests, self._hours_requests, self._minutes_requests, self._seconds_requests ):
dicts_flat.append( d.items() )
return dicts_flat
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
counters = [ collections.Counter( dict( flat_dict ) ) for flat_dict in serialisable_info ]
self._months_bytes = counters[ 0 ]
self._days_bytes = counters[ 1 ]
self._hours_bytes = counters[ 2 ]
self._minutes_bytes = counters[ 3 ]
self._seconds_bytes = counters[ 4 ]
self._months_requests = counters[ 5 ]
self._days_requests = counters[ 6 ]
self._hours_requests = counters[ 7 ]
self._minutes_requests = counters[ 8 ]
self._seconds_requests = counters[ 9 ]
def _GetCurrentDateTime( self ):
return datetime.datetime.utcfromtimestamp( HydrusData.GetNow() )
def _GetWindowAndCounter( self, bandwidth_type, time_delta ):
if bandwidth_type == HC.BANDWIDTH_TYPE_DATA:
if time_delta < self.MAX_SECONDS_TIME_DELTA:
window = 0
counter = self._seconds_bytes
elif time_delta < self.MAX_MINUTES_TIME_DELTA:
window = 60
counter = self._minutes_bytes
elif time_delta < self.MAX_HOURS_TIME_DELTA:
window = 3600
counter = self._hours_bytes
else:
window = 86400
counter = self._days_bytes
elif bandwidth_type == HC.BANDWIDTH_TYPE_REQUESTS:
if time_delta < self.MAX_SECONDS_TIME_DELTA:
window = 0
counter = self._seconds_requests
elif time_delta < self.MAX_MINUTES_TIME_DELTA:
window = 60
counter = self._minutes_requests
elif time_delta < self.MAX_HOURS_TIME_DELTA:
window = 3600
counter = self._hours_requests
else:
window = 86400
counter = self._days_requests
return ( window, counter )
def _GetMonthTime( self, dt ):
( year, month ) = ( dt.year, dt.month )
month_dt = datetime.datetime( year, month, 1 )
month_time = calendar.timegm( month_dt.timetuple() )
return month_time
def _GetRawUsage( self, bandwidth_type, time_delta ):
if time_delta is None:
dt = self._GetCurrentDateTime()
month_time = self._GetMonthTime( dt )
if bandwidth_type == HC.BANDWIDTH_TYPE_DATA:
return self._months_bytes[ month_time ]
elif bandwidth_type == HC.BANDWIDTH_TYPE_REQUESTS:
return self._months_requests[ month_time ]
( window, counter ) = self._GetWindowAndCounter( bandwidth_type, time_delta )
# we need the 'window' because this tracks brackets from the first timestamp and we want to include if 'since' lands anywhere in the bracket
# e.g. if it is 1200 and we want the past 1,000, we also need the bracket starting at 0, which will include 200-999
time_delta += window
since = HydrusData.GetNow() - time_delta
return sum( ( value for ( key, value ) in counter.items() if key >= since ) )
def _GetTimes( self, dt ):
# collapse each time portion to the latest timestamp it covers
( year, month, day, hour, minute ) = ( dt.year, dt.month, dt.day, dt.hour, dt.minute )
month_dt = datetime.datetime( year, month, 1 )
day_dt = datetime.datetime( year, month, day )
hour_dt = datetime.datetime( year, month, day, hour )
minute_dt = datetime.datetime( year, month, day, hour, minute )
month_time = calendar.timegm( month_dt.timetuple() )
day_time = calendar.timegm( day_dt.timetuple() )
hour_time = calendar.timegm( hour_dt.timetuple() )
minute_time = calendar.timegm( minute_dt.timetuple() )
second_time = calendar.timegm( dt.timetuple() )
return ( month_time, day_time, hour_time, minute_time, second_time )
def _GetUsage( self, bandwidth_type, time_delta ):
if time_delta is not None and bandwidth_type == HC.BANDWIDTH_TYPE_DATA and time_delta <= 5:
usage = self._GetWeightedApproximateUsage( time_delta )
else:
usage = self._GetRawUsage( bandwidth_type, time_delta )
self._MaintainCache()
return usage
def _GetWeightedApproximateUsage( self, time_delta ):
SEARCH_DELTA = time_delta * 5
window = 0
counter = self._seconds_bytes
SEARCH_DELTA += window
now = HydrusData.GetNow()
since = now - SEARCH_DELTA
valid_keys = [ key for key in counter.keys() if key >= since ]
if len( valid_keys ) == 0:
return 0
# If we want the average speed over past five secs but nothing has happened in sec 4 and 5, we don't want to count them
# otherwise your 1MB/s counts as 200KB/s
earliest_timestamp = min( valid_keys )
SAMPLE_DELTA = max( now - earliest_timestamp, 1 )
total_bytes = sum( ( counter[ key ] for key in valid_keys ) )
time_delta_average = total_bytes / SAMPLE_DELTA
return time_delta_average
def _MaintainCache( self ):
if HydrusData.TimeHasPassed( self._next_cache_maintenance_timestamp ):
now = HydrusData.GetNow()
oldest_second = now - self.MAX_SECONDS_TIME_DELTA
oldest_minute = now - self.MAX_MINUTES_TIME_DELTA
oldest_hour = now - self.MAX_HOURS_TIME_DELTA
oldest_day = now - self.MAX_DAYS_TIME_DELTA
def clear_counter( counter, timestamp ):
bad_keys = [ key for key in counter.keys() if key < timestamp ]
for bad_key in bad_keys:
del counter[ bad_key ]
clear_counter( self._days_bytes, oldest_day )
clear_counter( self._days_requests, oldest_day )
clear_counter( self._hours_bytes, oldest_hour )
clear_counter( self._hours_requests, oldest_hour )
clear_counter( self._minutes_bytes, oldest_minute )
clear_counter( self._minutes_requests, oldest_minute )
clear_counter( self._seconds_bytes, oldest_second )
clear_counter( self._seconds_requests, oldest_second )
self._next_cache_maintenance_timestamp = HydrusData.GetNow() + self.CACHE_MAINTENANCE_TIME_DELTA
def GetCurrentMonthSummary( self ):
with self._lock:
num_bytes = self._GetUsage( HC.BANDWIDTH_TYPE_DATA, None )
num_requests = self._GetUsage( HC.BANDWIDTH_TYPE_REQUESTS, None )
return 'used ' + HydrusData.ConvertIntToBytes( num_bytes ) + ' in ' + HydrusData.ConvertIntToPrettyString( num_requests ) + ' requests this month'
def GetMonthlyDataUsage( self ):
with self._lock:
result = []
for ( month_time, usage ) in self._months_bytes.items():
month_dt = datetime.datetime.utcfromtimestamp( month_time )
( year, month ) = ( month_dt.year, month_dt.month )
date_str = str( year ) + '-' + str( month )
result.append( ( date_str, usage ) )
result.sort()
return result
def GetUsage( self, bandwidth_type, time_delta ):
with self._lock:
if time_delta == 0:
return 0
return self._GetUsage( bandwidth_type, time_delta )
def GetWaitingEstimate( self, bandwidth_type, time_delta, max_allowed ):
with self._lock:
if time_delta is None: # this is monthly
dt = self._GetCurrentDateTime()
( year, month ) = ( dt.year, dt.month )
next_month_dt = datetime.datetime( year, month + 1, 1 )
next_month_time = calendar.timegm( next_month_dt.timetuple() )
return next_month_time - HydrusData.GetNow()
else:
# we want the highest time_delta at which usage is >= than max_allowed
# time_delta subtract that amount is the time we have to wait for usage to be less than max_allowed
# e.g. if in the past 24 hours there was a bunch of usage 16 hours ago clogging it up, we'll have to wait ~8 hours
( window, counter ) = self._GetWindowAndCounter( bandwidth_type, time_delta )
time_and_values = counter.items()
time_and_values.sort( reverse = True )
now = HydrusData.GetNow()
usage = 0
for ( timestamp, value ) in time_and_values:
current_search_time_delta = now - timestamp
if current_search_time_delta > time_delta: # we are searching beyond our time delta. no need to wait
break
usage += value
if usage >= max_allowed:
return time_delta - current_search_time_delta
return 0
def ReportDataUsed( self, num_bytes ):
with self._lock:
dt = self._GetCurrentDateTime()
( month_time, day_time, hour_time, minute_time, second_time ) = self._GetTimes( dt )
self._months_bytes[ month_time ] += num_bytes
self._days_bytes[ day_time ] += num_bytes
self._hours_bytes[ hour_time ] += num_bytes
self._minutes_bytes[ minute_time ] += num_bytes
self._seconds_bytes[ second_time ] += num_bytes
self._MaintainCache()
def ReportRequestUsed( self ):
with self._lock:
dt = self._GetCurrentDateTime()
( month_time, day_time, hour_time, minute_time, second_time ) = self._GetTimes( dt )
self._months_requests[ month_time ] += 1
self._days_requests[ day_time ] += 1
self._hours_requests[ hour_time ] += 1
self._minutes_requests[ minute_time ] += 1
self._seconds_requests[ second_time ] += 1
self._MaintainCache()
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_BANDWIDTH_TRACKER ] = BandwidthTracker