diff options
author | Treehugger Robot <treehugger-gerrit@google.com> | 2018-04-09 23:55:17 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2018-04-09 23:55:17 +0000 |
commit | 2d726901ab174592cff7c8a681fa2d017991a969 (patch) | |
tree | 878d54254b21882cf2068ad2b658e23efe6e2417 | |
parent | 5eb57e19b83b467784d0bcae4f7ff1fe14ce4a7b (diff) | |
parent | b84d5fd0c6e67af069fe542149b0fc7d72844e50 (diff) | |
download | extras-2d726901ab174592cff7c8a681fa2d017991a969.tar.gz |
Merge changes I8910fa82,I9bda1717,I3d9c1eb0
* changes:
Perfprofd: Separate symbol offset in samples
Perfprofd: Add parallel processing to stack script
Perfprofd: Update perf_proto_stack
-rw-r--r-- | perfprofd/scripts/perf_proto_stack.py | 603 | ||||
-rw-r--r-- | perfprofd/scripts/sorted_collection.py | 146 |
2 files changed, 645 insertions, 104 deletions
diff --git a/perfprofd/scripts/perf_proto_stack.py b/perfprofd/scripts/perf_proto_stack.py index 03693589..1fdd938b 100644 --- a/perfprofd/scripts/perf_proto_stack.py +++ b/perfprofd/scripts/perf_proto_stack.py @@ -17,121 +17,516 @@ # Super simplistic printer of a perfprofd output proto. Illustrates # how to parse and traverse a perfprofd output proto in Python. +# This relies on libunwindstack's unwind_symbol. Build with +# mmma system/core/libunwindstack + +import argparse +import itertools +import json + +import logging +logging.basicConfig(format = "%(message)s") + +from multiprocessing.dummy import Pool as ThreadPool +import os.path +from sorted_collection import SortedCollection +import subprocess +from threading import Timer + + # Generate with: -# aprotoc -I=system/extras/perfprofd --python_out=system/extras/perfprofd/scripts \ -# system/extras/perfprofd/perf_profile.proto -import perf_profile_pb2 +# aprotoc -I=external/perf_data_converter/src/quipper \ +# --python_out=system/extras/perfprofd/scripts \ +# external/perf_data_converter/src/quipper/perf_data.proto +# aprotoc -I=external/perf_data_converter/src/quipper -I=system/extras/perfprofd \ +# --python_out=system/extras/perfprofd/scripts \ +# system/extras/perfprofd/perfprofd_record.proto +import perfprofd_record_pb2 # Make sure that symbol is on the PYTHONPATH, e.g., run as # PYTHONPATH=$PYTHONPATH:$ANDROID_BUILD_TOP/development/scripts python ... import symbol +from symbol import SymbolInformation # This is wrong. But then the symbol module is a bad quagmire. +# TODO: Check build IDs. symbol.SetAbi(["ABI: 'arm64'"]) -print "Reading symbols from", symbol.SYMBOLS_DIR - -# TODO: accept argument for parsing. -file = open('perf.data.encoded.0', 'rb') -data = file.read() - -profile = perf_profile_pb2.AndroidPerfProfile() -profile.ParseFromString(data) - -print "Total samples: ", profile.total_samples - -module_list = profile.load_modules - -counters = {} - -def indent(txt, stops = 1): - return '\n'.join(' ' * stops + line for line in txt.splitlines()) - - -def print_samples(module_list, programs, process_names, counters): - print 'Samples:' - for program in programs: - process_name = '?' - if program.HasField('process_name_id'): - process_name = process_names[program.process_name_id] - print indent('%s (%s)' % (program.name, process_name), 1) - for module in program.modules: - if module.HasField('load_module_id'): - module_descr = module_list[module.load_module_id] - print indent(module_descr.name, 2) - has_build_id = module_descr.HasField('build_id') - if has_build_id: - print indent('Build ID: %s' % (module_descr.build_id), 3) - for addr in module.address_samples: - # TODO: Stacks vs single samples. - addr_rel = addr.address[0] - addr_rel_hex = "%x" % addr_rel - print indent('%d %s' % (addr.count, addr_rel_hex), 3) - if module_descr.name != '[kernel.kallsyms]': - if has_build_id: - info = symbol.SymbolInformation(module_descr.name, addr_rel_hex) - # As-is, only info[0] (inner-most inlined function) is recognized. - (source_symbol, source_location, object_symbol_with_offset) = info[0] - if object_symbol_with_offset is not None: - print indent(object_symbol_with_offset, 4) - if source_symbol is not None: - for (sym_inlined, loc_inlined, _) in info: - # TODO: Figure out what's going on here: - if sym_inlined is not None: - print indent(sym_inlined, 5) - else: - print indent('???', 5) - if loc_inlined is not None: - print ' %s' % (indent(loc_inlined, 5)) - elif module_descr.symbol and (addr_rel & 0x8000000000000000 != 0): - index = 0xffffffffffffffff - addr_rel - source_symbol = module_descr.symbol[index] - print indent(source_symbol, 4) - counters_key = None - if source_symbol is not None: - counters_key = (module_descr.name, source_symbol) - else: - counters_key = (module_descr.name, addr_rel_hex) - if counters_key in counters: - counters[counters_key] = counters[counters_key] + addr.count - else: - counters[counters_key] = addr.count + +class MmapState(object): + def __init__(self): + self._list = SortedCollection((), lambda x : x[0]) + + def add_map(self, start, length, pgoff, name): + tuple = (start, length, pgoff, name) + self._list.insert(tuple) + + def find(self, addr): + try: + tuple = self._list.find_le(addr) + if addr < tuple[0] + tuple[1]: + return tuple + return None + except ValueError: + return None + + def copy(self): + ret = MmapState() + ret._list = self._list.copy() + return ret + + def __str__(self): + return "MmapState: " + self._list.__str__() + def __repr__(self): + return self.__str__() + +class SymbolMap(object): + def __init__(self, min_v): + self._list = SortedCollection((), lambda x : x[0]) + self._min_vaddr = min_v + + def add_symbol(self, start, length, name): + tuple = (start, length, name) + self._list.insert(tuple) + + def find(self, addr): + try: + tuple = self._list.find_le(addr) + if addr < tuple[0] + tuple[1]: + return tuple[2] + return None + except ValueError: + return None + + def copy(self): + ret = SymbolMap() + ret._list = self._list.copy() + return ret + + def __str__(self): + return "SymbolMap: " + self._list.__str__() + def __repr__(self): + return self.__str__() + +def intern_uni(u): + return intern(u.encode('ascii', 'replace')) + +def collect_tid_names(perf_data): + tid_name_map = {} + for event in perf_data.events: + if event.HasField('comm_event'): + tid_name_map[event.comm_event.tid] = intern_uni(event.comm_event.comm) + return tid_name_map + +def create_symbol_maps(profile): + symbol_maps = {} + for si in profile.symbol_info: + map = SymbolMap(si.min_vaddr) + symbol_maps[si.filename] = map + for sym in si.symbols: + map.add_symbol(sym.addr, sym.size, intern_uni(sym.name)) + return symbol_maps + +def update_mmap_states(event, state_map): + if event.HasField('mmap_event'): + mmap_event = event.mmap_event + # Skip kernel stuff. + if mmap_event.tid == 0: + return + # Create new map, if necessary. + if not mmap_event.pid in state_map: + state_map[mmap_event.pid] = MmapState() + state_map[mmap_event.pid].add_map(mmap_event.start, mmap_event.len, mmap_event.pgoff, + intern_uni(mmap_event.filename)) + elif event.HasField('fork_event'): + fork_event = event.fork_event + # Skip threads + if fork_event.pid == fork_event.ppid: + return + if fork_event.ppid not in state_map: + logging.warn("fork from %d without map", fork_event.ppid) + return + state_map[fork_event.pid] = state_map[fork_event.ppid].copy() + +skip_dso = set() +vaddr = {} + +def find_vaddr(vaddr_map, filename): + if filename in vaddr_map: + return vaddr_map[filename] + + path = "%s/%s" % (symbol.SYMBOLS_DIR, filename) + if not os.path.isfile(path): + logging.warn('Cannot find %s for min_vaddr', filename) + vaddr_map[filename] = 0 + return 0 + + try: + # Use "-W" to have single-line format. + res = subprocess.check_output(['readelf', '-lW', path]) + lines = res.split("\n") + reading_headers = False + min_vaddr = None + min_fn = lambda x, y: y if x is None else min(x, y) + # Using counting loop for access to next line. + for i in range(0, len(lines) - 1): + line = lines[i].strip() + if reading_headers: + if line == "": + # Block is done, won't find anything else. + break + if line.startswith("LOAD"): + # Look at the current line to distinguish 32-bit from 64-bit + line_split = line.split() + if len(line_split) >= 8: + if " R E " in line: + # Found something expected. So parse VirtAddr. + try: + min_vaddr = min_fn(min_vaddr, int(line_split[2], 0)) + except ValueError: + pass + else: + logging.warn('Could not parse readelf line %s', line) + else: + if line.strip() == "Program Headers:": + reading_headers = True + + if min_vaddr is None: + min_vaddr = 0 + logging.debug("min_vaddr for %s is %d", filename, min_vaddr) + vaddr_map[filename] = min_vaddr + except subprocess.CalledProcessError: + logging.warn('Error finding min_vaddr for %s', filename) + vaddr_map[filename] = 0 + return vaddr_map[filename] + +unwind_symbols_cache = {} +unwind_symbols_warn_missing_cache = set() +def run_unwind_symbols(filename, offset_hex): + path = "%s/%s" % (symbol.SYMBOLS_DIR, filename) + if not os.path.isfile(path): + if path not in unwind_symbols_warn_missing_cache: + logging.warn('Cannot find %s for unwind_symbols', filename) + unwind_symbols_warn_missing_cache.add(path) + return None + + if (path, offset_hex) in unwind_symbols_cache: + pair = unwind_symbols_cache[(path, offset_hex)] + if pair is None: + return None + return [(pair[0], pair[1], filename)] + + try: + res = subprocess.check_output(['unwind_symbols', path, offset_hex]) + lines = res.split("\n") + for line in lines: + if line.startswith('<0x'): + parts = line.split(' ', 1) + if len(parts) == 2: + # Get offset, too. + offset = 0 + plus_index = parts[0].find('>+') + if plus_index > 0: + offset_str = parts[0][plus_index + 2:-1] + try: + offset = int(offset_str) + except ValueError: + logging.warn('error parsing offset from %s', parts[0]) + + # TODO C++ demangling necessary. + logging.debug('unwind_symbols: %s %s -> %s +%d', filename, offset_hex, parts[1], + offset) + sym = intern(parts[1]) + unwind_symbols_cache[(path, offset_hex)] = (sym, offset) + return [(sym, offset, filename)] + except subprocess.CalledProcessError: + logging.warn('Failed running unwind_symbols for %s', filename) + unwind_symbols_cache[(path, offset_hex)] = None + return None + + +def decode_with_symbol_lib(name, addr_rel_hex): + info = SymbolInformation(name, addr_rel_hex) + # As-is, only info[0] (inner-most inlined function) is recognized. + (source_symbol, source_location, object_symbol_with_offset) = info[0] + + def parse_symbol_lib_output(s): + i = s.rfind('+') + if i > 0: + try: + off = int(s[i+1:]) + return (s[0:i], off) + except ValueError: + pass + return (s, 0) + + ret = [] + + if object_symbol_with_offset is not None: + pair = parse_symbol_lib_output(object_symbol_with_offset) + ret.append((intern(pair[0]), pair[1], name)) + if source_symbol is not None: + iterinfo = iter(info) + next(iterinfo) + for (sym_inlined, loc_inlined, _) in iterinfo: + # TODO: Figure out what's going on here: + if sym_inlined is not None: + pair = parse_symbol_lib_output(sym_inlined) + ret.insert(0, (intern(pair[0]), pair[1], name)) + if len(ret) > 0: + return ret + return None + +def decode_addr(addr, mmap_state, device_symbols): + """Try to decode the given address against the current mmap table and device symbols. + + First, look up the address in the mmap state. If none is found, use a simple address + heuristic to guess kernel frames on 64-bit devices. + + Next, check on-device symbolization for a hit. + + Last, try to symbolize against host information. First try the symbol module. However, + as it is based on addr2line, it will not work for pure-gnu_debugdata DSOs (e.g., ART + preopt artifacts). For that case, use libunwindstack's unwind_symbols. + """ + + map = mmap_state.find(addr) + if map is None: + # If it looks large enough, assume it's from + # the kernel. + if addr > 18000000000000000000: + return [("[kernel]", 0, "[kernel]")] + return [("%d (no mapped segment)" % addr, 0, None)] + name = map[3] + logging.debug('%d is %s (%d +%d)', addr, name, map[0], map[1]) + + # Once relocation packer is off, it would be: + # offset = addr - map.start + map.pgoff + # Right now it is + # offset = addr - map.start (+ min_vaddr) + # Note that on-device symbolization doesn't include min_vaddr but + # does include pgoff. + offset = addr - map[0] + + if name in device_symbols: + offset = offset + map[2] + symbol = device_symbols[name].find(offset) + if symbol is None: + return [("%s (missing on-device symbol)" % (name), offset, name)] + else: + # TODO: Should we change the format? + return [(symbol, 0, name)] + offset = offset + find_vaddr(vaddr, name) + if (name, offset) in skip_dso: + # We already failed, skip symbol finding. + return [(name, offset, name)] + else: + addr_rel_hex = intern("%x" % offset) + ret = decode_with_symbol_lib(name, addr_rel_hex) + if ret is not None and len(ret) != 0: + # Addr2line may report oatexec+xyz. Let unwind_symbols take care of that. + if len(ret) != 1 or ret[0][0] != 'oatexec': + logging.debug('Got result from symbol module: %s', str(ret)) + return ret + # Try unwind_symbols + ret = run_unwind_symbols(name, addr_rel_hex) + if ret is not None and len(ret) != 0: + return ret + logging.warn("Failed to find symbol for %s +%d (%d)", name, offset, addr) + # Remember the fail. + skip_dso.add((name, offset)) + return [(name, offset, name)] + + +def print_sample(sample, tid_name_map): + if sample[0] in tid_name_map: + pid_name = "%s (%d)" % (tid_name_map[sample[0]], sample[0]) + elif sample[0] == 0: + pid_name = "kernel (0)" + else: + pid_name = "unknown (%d)" % (sample[0]) + if sample[1] in tid_name_map: + tid_name = "%s (%d)" % (tid_name_map[sample[1]], sample[1]) + elif sample[1] == 0: + tid_name = "kernel (0)" + else: + tid_name = "unknown (%d)" % (sample[1]) + print " %s - %s:" % (pid_name, tid_name) + for sym in sample[2]: + print " %s +%d (%s)" % (sym[0], sym[1], sym[2]) + +def print_samples(samples, tid_name_map): + for sample in samples: + print_sample(sample, tid_name_map) + +def symbolize_events(perf_data, device_symbols, tid_name_map, printSamples = False, + removeKernelTop = False): + samples = [] + mmap_states = {} + for event in perf_data.events: + update_mmap_states(event, mmap_states) + if event.HasField('sample_event'): + sample_ev = event.sample_event + # Handle sample. + new_sample = None + if sample_ev.pid in mmap_states: + mmap_state = mmap_states[sample_ev.pid] + ip_sym = decode_addr(sample_ev.ip, mmap_state, device_symbols) + stack = ip_sym + for cc_ip in sample_ev.callchain: + cc_sym = decode_addr(cc_ip, mmap_state, device_symbols) + stack.extend(cc_sym) + if removeKernelTop: + while len(stack) > 1 and stack[0][0] == "[kernel]": + stack.pop(0) + new_sample = (sample_ev.pid, sample_ev.tid, stack) else: - print indent('<Missing module>', 2) + # Handle kernel symbols specially. + if sample_ev.pid == 0: + samples.append((0, sample_ev.tid, [("[kernel]", 0, "[kernel]")])) + elif sample_ev.pid in tid_name_map: + samples.append((sample_ev.pid, sample_ev.tid, [(tid_name_map[sample_ev.pid], 0, + None)])) + else: + samples.append((sample_ev.pid, sample_ev.tid, [("[unknown]", 0, None)])) + if new_sample is not None: + samples.append(new_sample) + if printSamples: + print_sample(new_sample, tid_name_map) + return samples -def print_histogram(counters, size): +def count_key_reduce_function(x, y, key_fn): + key = key_fn(y) + if key not in x: + x[key] = 0 + x[key] = x[key] + 1 + return x + +def print_histogram(samples, reduce_key_fn, label_key_fn, size): # Create a sorted list of top samples. - counter_list = [] - for key, value in counters.iteritems(): - temp = (key,value) - counter_list.append(temp) - counter_list.sort(key=lambda counter: counter[1], reverse=True) + sorted_count_list = sorted( + reduce(lambda x, y: count_key_reduce_function(x, y, reduce_key_fn), samples, {}). + iteritems(), + cmp=lambda x,y: cmp(x[1], y[1]), + reverse=True) + sorted_count_topX = list(itertools.islice(sorted_count_list, size)) # Print top-size samples. print 'Histogram top-%d:' % (size) - for i in xrange(0, min(len(counter_list), size)): - print indent('%d: %s' % (i+1, counter_list[i]), 1) - -def print_modules(module_list): - print 'Modules:' - for module in module_list: - print indent(module.name, 1) - if module.HasField('build_id'): - print indent('Build ID: %s' % (module.build_id), 2) - print indent('Symbols:', 2) - for symbol in module.symbol: - print indent(symbol, 3) - -def print_process_names(process_names): - print 'Processes:' - for proc in process_names: - print indent(proc, 1) - -if profile.HasField('process_names'): - process_names = profile.process_names.name -else: - process_names = [] - -print_samples(module_list, profile.programs, process_names, counters) -print_modules(module_list) -print_histogram(counters, 100) -print_process_names(process_names) + for i in xrange(0, len(sorted_count_topX)): + print ' %d: %s (%s)' % (i+1, label_key_fn(sorted_count_topX[i][0]), + sorted_count_topX[i][1]) + +def get_name(pid): + if pid in tid_name_map: + return tid_name_map[pid] + if pid == 0: + return "[kernel]" + return "[unknown]" + +def create_cmd(args, f): + ret = ['python', '-u', 'system/extras/perfprofd/scripts/perf_proto_stack.py'] + if args.syms is not None: + ret.extend(['--syms', args.syms[0]]) + if args.print_samples is not None: + ret.append('--print-samples') + if args.skip_kernel_syms is not None: + ret.append('--skip-kernel-syms') + if args.print_pid_histogram is not None: + ret.append('--print-pid-histogram') + if args.print_sym_histogram is not None: + ret.append('--print-sym-histogram') + if args.print_dso_histogram is not None: + ret.append('--print-dso-histogram') + ret.extend(['--json-out', '%s.json' % (f)]) + ret.append(f) + return ret + +def run_cmd(x): + args = x[0] + f = x[1] + cmd = create_cmd(args,f) + logging.warn('Running on %s', f) + success = False + logging.debug('%r', cmd) + err_out = open('%s.err' % (f), 'w') + kill = lambda process: process.kill() + p = subprocess.Popen(cmd, stderr=err_out) + kill_timer = Timer(3600, kill, [p]) + try: + kill_timer.start() + stdout, stderr = p.communicate() + success = True + finally: + kill_timer.cancel() + logging.warn('Ended %s', f) + err_out.close() + return '%s: %r' % (f, success) + +def parallel_runner(args): + pool = ThreadPool(args.parallel) + map_args = map(lambda f: (args, f), args.file) + result = pool.map(run_cmd, map_args) + pool.close() + pool.join() + print result + +def run(args): + if args.syms is not None: + symbol.SYMBOLS_DIR = args.syms[0] + print_symbols = args.print_samples is not None + skip_kernel_syms = args.skip_kernel_syms is not None + + # TODO: accept argument for parsing. + file = open(args.file[0], 'rb') + data = file.read() + file.close() + + profile = perfprofd_record_pb2.PerfprofdRecord() + profile.ParseFromString(data) + + perf_data = profile.perf_data + + print "Stats: ", perf_data.stats + + tid_name_map = collect_tid_names(perf_data) + symbol_maps = create_symbol_maps(profile) + + samples = symbolize_events(perf_data, symbol_maps, tid_name_map, printSamples=print_symbols, + removeKernelTop=skip_kernel_syms) + + if args.print_pid_histogram is not None: + print_histogram(samples, lambda x: x[0], lambda x: get_name(x), 25) + if args.print_sym_histogram is not None: + print_histogram(samples, lambda x: x[2][0][0], lambda x: x, 100) + if args.print_dso_histogram is not None: + print_histogram(samples, lambda x: x[2][0][2], lambda x: x, 25) + + if args.json_out is not None: + json_file = open(args.json_out[0], 'w') + json_data = { 'samples': samples, 'names': tid_name_map } + json.dump(json_data, json_file) + json_file.close() + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Process a perfprofd record.') + + parser.add_argument('file', help='proto file to parse', metavar='file', nargs='+') + parser.add_argument('--syms', help='directory for symbols', nargs=1) + parser.add_argument('--json-out', help='output file for JSON', nargs=1) + parser.add_argument('--print-samples', help='print samples', action='store_const', const=True) + parser.add_argument('--skip-kernel-syms', help='skip kernel symbols at the top of stack', + action='store_const', const=True) + parser.add_argument('--print-pid-histogram', help='print a top-25 histogram of processes', + action='store_const', const=True) + parser.add_argument('--print-sym-histogram', help='print a top-100 histogram of symbols', + action='store_const', const=True) + parser.add_argument('--print-dso-histogram', help='print a top-25 histogram of maps', + action='store_const', const=True) + parser.add_argument('--parallel', help='run parallel jobs', type=int) + + args = parser.parse_args() + if args is not None: + if args.parallel is not None: + parallel_runner(args) + else: + run(args) diff --git a/perfprofd/scripts/sorted_collection.py b/perfprofd/scripts/sorted_collection.py new file mode 100644 index 00000000..315f7c89 --- /dev/null +++ b/perfprofd/scripts/sorted_collection.py @@ -0,0 +1,146 @@ +# Note: Taken from https://code.activestate.com/recipes/577197-sortedcollection/. +# +# Copyright 2010 Raymond Hettinger +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in the +# Software without restriction, including without limitation the rights to use, copy, +# modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, subject to the +# following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies +# or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE +# FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +from bisect import bisect_left, bisect_right + +class SortedCollection(object): + def __init__(self, iterable=(), key=None): + self._given_key = key + key = (lambda x: x) if key is None else key + decorated = sorted((key(item), item) for item in iterable) + self._keys = [k for k, item in decorated] + self._items = [item for k, item in decorated] + self._key = key + + def _getkey(self): + return self._key + + def _setkey(self, key): + if key is not self._key: + self.__init__(self._items, key=key) + + def _delkey(self): + self._setkey(None) + + key = property(_getkey, _setkey, _delkey, 'key function') + + def clear(self): + self.__init__([], self._key) + + def copy(self): + return self.__class__(self, self._key) + + def __len__(self): + return len(self._items) + + def __getitem__(self, i): + return self._items[i] + + def __iter__(self): + return iter(self._items) + + def __reversed__(self): + return reversed(self._items) + + def __repr__(self): + return '%s(%r, key=%s)' % ( + self.__class__.__name__, + self._items, + getattr(self._given_key, '__name__', repr(self._given_key)) + ) + + def __reduce__(self): + return self.__class__, (self._items, self._given_key) + + def __contains__(self, item): + k = self._key(item) + i = bisect_left(self._keys, k) + j = bisect_right(self._keys, k) + return item in self._items[i:j] + + def index(self, item): + 'Find the position of an item. Raise ValueError if not found.' + k = self._key(item) + i = bisect_left(self._keys, k) + j = bisect_right(self._keys, k) + return self._items[i:j].index(item) + i + + def count(self, item): + 'Return number of occurrences of item' + k = self._key(item) + i = bisect_left(self._keys, k) + j = bisect_right(self._keys, k) + return self._items[i:j].count(item) + + def insert(self, item): + 'Insert a new item. If equal keys are found, add to the left' + k = self._key(item) + i = bisect_left(self._keys, k) + self._keys.insert(i, k) + self._items.insert(i, item) + + def insert_right(self, item): + 'Insert a new item. If equal keys are found, add to the right' + k = self._key(item) + i = bisect_right(self._keys, k) + self._keys.insert(i, k) + self._items.insert(i, item) + + def remove(self, item): + 'Remove first occurence of item. Raise ValueError if not found' + i = self.index(item) + del self._keys[i] + del self._items[i] + + def find(self, k): + 'Return first item with a key == k. Raise ValueError if not found.' + i = bisect_left(self._keys, k) + if i != len(self) and self._keys[i] == k: + return self._items[i] + raise ValueError('No item found with key equal to: %r' % (k,)) + + def find_le(self, k): + 'Return last item with a key <= k. Raise ValueError if not found.' + i = bisect_right(self._keys, k) + if i: + return self._items[i-1] + raise ValueError('No item found with key at or below: %r' % (k,)) + + def find_lt(self, k): + 'Return last item with a key < k. Raise ValueError if not found.' + i = bisect_left(self._keys, k) + if i: + return self._items[i-1] + raise ValueError('No item found with key below: %r' % (k,)) + + def find_ge(self, k): + 'Return first item with a key >= equal to k. Raise ValueError if not found' + i = bisect_left(self._keys, k) + if i != len(self): + return self._items[i] + raise ValueError('No item found with key at or above: %r' % (k,)) + + def find_gt(self, k): + 'Return first item with a key > k. Raise ValueError if not found' + i = bisect_right(self._keys, k) + if i != len(self): + return self._items[i] + raise ValueError('No item found with key above: %r' % (k,)) |