diff options
Diffstat (limited to 'utils/frozen_chromite/third_party/infra_libs/ts_mon/config.py')
-rw-r--r-- | utils/frozen_chromite/third_party/infra_libs/ts_mon/config.py | 251 |
1 files changed, 251 insertions, 0 deletions
diff --git a/utils/frozen_chromite/third_party/infra_libs/ts_mon/config.py b/utils/frozen_chromite/third_party/infra_libs/ts_mon/config.py new file mode 100644 index 0000000000..89fa8fe060 --- /dev/null +++ b/utils/frozen_chromite/third_party/infra_libs/ts_mon/config.py @@ -0,0 +1,251 @@ +# Copyright 2015 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import json +import logging +import os +import socket +import sys +import re + +import requests + +from infra_libs.ts_mon.common import interface +from infra_libs.ts_mon.common import monitors +from infra_libs.ts_mon.common import standard_metrics +from infra_libs.ts_mon.common import targets + + +def load_machine_config(filename): + if not os.path.exists(filename): + logging.info('Configuration file does not exist, ignoring: %s', filename) + return {} + + try: + with open(filename) as fh: + return json.load(fh) + except Exception: + logging.error('Configuration file couldn\'t be read: %s', filename) + raise + + +def _default_region(fqdn): + # Check if we're running in a GCE instance. + try: + r = requests.get( + 'http://metadata.google.internal/computeMetadata/v1/instance/zone', + headers={'Metadata-Flavor': 'Google'}, + timeout=1.0) + except requests.exceptions.RequestException: + pass + else: + if r.status_code == requests.codes.ok: + # The zone is the last slash-separated component. + return r.text.split('/')[-1] + + try: + return fqdn.split('.')[1] # [chrome|golo] + except IndexError: + return '' + + +def _default_network(host): + try: + # Regular expression that matches the vast majority of our host names. + # Matches everything of the form 'masterN', 'masterNa', and 'foo-xN'. + return re.match(r'^([\w-]*?-[acm]|master)(\d+)a?$', host).group(2) # N + except AttributeError: + return '' + + +def add_argparse_options(parser): + """Add monitoring related flags to a process' argument parser. + + Args: + parser (argparse.ArgumentParser): the parser for the main process. + """ + if sys.platform == 'win32': # pragma: no cover + default_config_file = 'C:\\chrome-infra\\ts-mon.json' + else: # pragma: no cover + default_config_file = '/etc/chrome-infra/ts-mon.json' + + parser = parser.add_argument_group('Timeseries Monitoring Options') + parser.add_argument( + '--ts-mon-config-file', + default=default_config_file, + help='path to a JSON config file that contains suitable values for ' + '"endpoint" and "credentials" for this machine. This config file is ' + 'intended to be shared by all processes on the machine, as the ' + 'values depend on the machine\'s position in the network, IP ' + 'whitelisting and deployment of credentials. (default: %(default)s)') + parser.add_argument( + '--ts-mon-endpoint', + help='url (file:// or https://) to post monitoring metrics to. If set, ' + 'overrides the value in --ts-mon-config-file') + parser.add_argument( + '--ts-mon-credentials', + help='path to a pkcs8 json credential file. If set, overrides the value ' + 'in --ts-mon-config-file') + parser.add_argument( + '--ts-mon-ca-certs', + help='path to file containing root CA certificates for SSL server ' + 'certificate validation. If not set, a CA cert file bundled with ' + 'httplib2 is used.') + parser.add_argument( + '--ts-mon-flush', + choices=('manual', 'auto'), default='auto', + help=('metric push behavior: manual (only send when flush() is called), ' + 'or auto (send automatically every --ts-mon-flush-interval-secs ' + 'seconds). (default: %(default)s)')) + parser.add_argument( + '--ts-mon-flush-interval-secs', + type=int, + default=60, + help=('automatically push metrics on this interval if ' + '--ts-mon-flush=auto.')) + parser.add_argument( + '--ts-mon-autogen-hostname', + action="store_true", + help=('Indicate that the hostname is autogenerated. ' + 'This option must be set on autoscaled GCE VMs, Kubernetes pods, ' + 'or any other hosts with dynamically generated names.')) + + parser.add_argument( + '--ts-mon-target-type', + choices=('device', 'task'), + default='device', + help='the type of target that is being monitored ("device" or "task").' + ' (default: %(default)s)') + + fqdn = socket.getfqdn().lower() # foo-[a|m]N.[chrome|golo].chromium.org + host = fqdn.split('.')[0] # foo-[a|m]N + region = _default_region(fqdn) + network = _default_network(host) + + parser.add_argument( + '--ts-mon-device-hostname', + default=host, + help='name of this device, (default: %(default)s)') + parser.add_argument( + '--ts-mon-device-region', + default=region, + help='name of the region this devices lives in. (default: %(default)s)') + parser.add_argument( + '--ts-mon-device-role', + default='default', + help='Role of the device. (default: %(default)s)') + parser.add_argument( + '--ts-mon-device-network', + default=network, + help='name of the network this device is connected to. ' + '(default: %(default)s)') + + parser.add_argument( + '--ts-mon-task-service-name', + help='name of the service being monitored') + parser.add_argument( + '--ts-mon-task-job-name', + help='name of this job instance of the task') + parser.add_argument( + '--ts-mon-task-region', + default=region, + help='name of the region in which this task is running ' + '(default: %(default)s)') + parser.add_argument( + '--ts-mon-task-hostname', + default=host, + help='name of the host on which this task is running ' + '(default: %(default)s)') + parser.add_argument( + '--ts-mon-task-number', type=int, default=0, + help='number (e.g. for replication) of this instance of this task ' + '(default: %(default)s)') + + parser.add_argument( + '--ts-mon-metric-name-prefix', + default='/chrome/infra/', + help='metric name prefix for all metrics (default: %(default)s)') + + parser.add_argument( + '--ts-mon-use-new-proto', + default=True, action='store_true', + help='deprecated and ignored') + + +def process_argparse_options(args): + """Process command line arguments to initialize the global monitor. + + Also initializes the default target. + + Starts a background thread to automatically flush monitoring metrics if not + disabled by command line arguments. + + Args: + args (argparse.Namespace): the result of parsing the command line arguments + """ + # Parse the config file if it exists. + config = load_machine_config(args.ts_mon_config_file) + endpoint = config.get('endpoint', '') + credentials = config.get('credentials', '') + autogen_hostname = config.get('autogen_hostname', False) + + # Command-line args override the values in the config file. + if args.ts_mon_endpoint is not None: + endpoint = args.ts_mon_endpoint + if args.ts_mon_credentials is not None: + credentials = args.ts_mon_credentials + + if args.ts_mon_target_type == 'device': + hostname = args.ts_mon_device_hostname + if args.ts_mon_autogen_hostname or autogen_hostname: + hostname = 'autogen:' + hostname + interface.state.target = targets.DeviceTarget( + args.ts_mon_device_region, + args.ts_mon_device_role, + args.ts_mon_device_network, + hostname) + if args.ts_mon_target_type == 'task': + # Reimplement ArgumentParser.error, since we don't have access to the parser + if not args.ts_mon_task_service_name: + print >> sys.stderr, ('Argument --ts-mon-task-service-name must be ' + 'provided when the target type is "task".') + sys.exit(2) + if not args.ts_mon_task_job_name: + print >> sys.stderr, ('Argument --ts-mon-task-job-name must be provided ' + 'when the target type is "task".') + sys.exit(2) + hostname = args.ts_mon_task_hostname + if args.ts_mon_autogen_hostname or autogen_hostname: + hostname = 'autogen:' + hostname + interface.state.target = targets.TaskTarget( + args.ts_mon_task_service_name, + args.ts_mon_task_job_name, + args.ts_mon_task_region, + hostname, + args.ts_mon_task_number) + + interface.state.metric_name_prefix = args.ts_mon_metric_name_prefix + interface.state.global_monitor = monitors.NullMonitor() + + if endpoint.startswith('file://'): + interface.state.global_monitor = monitors.DebugMonitor( + endpoint[len('file://'):]) + elif endpoint.startswith('https://'): + interface.state.global_monitor = monitors.HttpsMonitor( + endpoint, monitors.CredentialFactory.from_string(credentials), + ca_certs=args.ts_mon_ca_certs) + elif endpoint.lower() == 'none' or not endpoint: + logging.info('ts_mon monitoring has been explicitly disabled') + else: + logging.error('ts_mon monitoring is disabled because the endpoint provided' + ' is invalid or not supported: %s', endpoint) + + interface.state.flush_mode = args.ts_mon_flush + + if args.ts_mon_flush == 'auto': + interface.state.flush_thread = interface._FlushThread( + args.ts_mon_flush_interval_secs) + interface.state.flush_thread.start() + + standard_metrics.init() |