diff --git a/README.md b/README.md index abeb89bbcc40a37a8c947586021d6d443c7a2d33..431e559e2468a6159c508dbde77f335db5fbaba7 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ A data framework encapsulating common data warehousing patterns and best practices, including: + - Defining and scheduling jobs locally, on Google Cloud Dataflow, and on Google Cloud ML Engine - Schemas on datasets and integrity of inputs and outputs - Versioned datasets in storage, never updated @@ -15,8 +16,20 @@ script/test script/distribute ``` +##### Generate your decryption key: + +``` +dojo encrypt +``` +or use a pre-exsiting key: +``` +$ export DOJO_DECRYPT_KEY=yourkey +``` + +##### Run your job + ``` -dojo --runner cloud --env production +dojo run --runner cloud --env production ``` ``` diff --git a/dojo/cli.py b/dojo/cli.py index 4be8eccfab0eb1e84a772fc5399b2ad3e8daa9ad..07cb84cc16c95d1c7df766d1bf87be7e8d08b041 100644 --- a/dojo/cli.py +++ b/dojo/cli.py @@ -2,16 +2,30 @@ import click import logging from .run import Entrypoint +from .secrets import Secrets -@click.command(help='Run a job') +@click.group() +def cli(): + pass + + +@cli.command(help='Run a dojo job') @click.argument('name') @click.option('--runner', default=None, help='specify a runner for the job') @click.option('--config', default='config', help='path to directory containing configuration files to be merged') @click.option('--env', default='development', help='environment used to select configuration and secrets') @click.pass_context -def cli(context, name, runner, config, env): +def run(context, name, runner, config, env): if context.obj is None: context.obj = {} logging.getLogger().setLevel(logging.INFO) Entrypoint().run(name, runner, config, env) + + +@cli.command(help='Encrypt secrets') +@click.option('--config', default='config', help='path to directory containing configuration files to be merged') +@click.option('--env', default='development', help='environment used to select configuration and secrets') +@click.pass_context +def encrypt(context, config, env): + Secrets().encrypt(config, env) diff --git a/dojo/run.py b/dojo/run.py index ba88cf3cd7a7147a754b3d6f6f3c4ed36ee95984..99c21daa62841200b8351c1556e4370f65e2b621 100644 --- a/dojo/run.py +++ b/dojo/run.py @@ -2,12 +2,12 @@ from __future__ import absolute_import, print_function, unicode_literals import os import yaml -import json import importlib import fnmatch -import subprocess from datetime import datetime +from .secrets import Secrets + from .util import deep_merge @@ -30,30 +30,9 @@ class Entrypoint(object): env_config = self._read_yaml(env_config_path) or {} config = deep_merge(base_config, env_config) - # Build secrets by decrypting available EJSONs. - env_ejson_secrets_path = os.path.join(config, 'secrets.%s.ejson' % (env, )) - env_ejson_secrets = self._read_json(env_ejson_secrets_path) - ejson_public_key = env_ejson_secrets['_public_key'] - ejson_private_key_path = os.path.join('/opt/ejson/keys/%s' % (ejson_public_key, )) - if not os.path.isfile(ejson_private_key_path): - if not os.environ.get('EJSON_PRIVATE_KEY'): - raise ValueError('ENV[EJSON_PRIVATE_KEY] must be set or %s must exist containing it.' % (ejson_private_key_path, )) - else: - ejson_private_key_dir = os.path.dirname(ejson_private_key_path) - if not os.path.exists(ejson_private_key_dir): - os.makedirs(ejson_private_key_dir) - with open(ejson_private_key_path, 'w') as f: - f.write(os.environ['EJSON_PRIVATE_KEY']) - print('%s written.' % (ejson_private_key_path, )) - try: - out = subprocess.check_output(['ejson', 'decrypt', env_ejson_secrets_path], stderr=subprocess.STDOUT) - out = '\n'.join([line for line in out.split('\n') if not line.startswith('warning:')]) - except subprocess.CalledProcessError as e: - raise ValueError(e.output) - try: - secrets = json.loads(out) - except ValueError as e: - raise ValueError(e, out) + # Build secrets by decrypting available JSONs + env_json_secrets_path = os.path.join(base_config_path, 'secrets.%s.json.enc' % (env, )) + secrets = Secrets.decrypt(env_json_secrets_path) # Build the job. job = self._build_job(name, config, secrets, runner) @@ -102,10 +81,10 @@ class Entrypoint(object): return job_class(job_config, job_secrets) - def _read_json(self, path): + def _read_file(self, path): if os.path.isfile(path): with open(path, 'r') as f: - return json.loads(f.read()) + return f.read() else: return {} diff --git a/dojo/secrets.py b/dojo/secrets.py new file mode 100644 index 0000000000000000000000000000000000000000..ee4b09aaeaa8b615cac7b44c7e7208b44647ab06 --- /dev/null +++ b/dojo/secrets.py @@ -0,0 +1,59 @@ +from __future__ import absolute_import, print_function, unicode_literals + +import os +import json +import click + +from cryptography.fernet import Fernet + + +class Secrets(object): + + def encrypt(self, config, env): + encrypt_key = os.environ.get('DOJO_DECRYPT_KEY') + if not encrypt_key and click.confirm('No key found in your environment variables. Would you like to generate a key?'): + encrypt_key = Fernet.generate_key() + click.echo(click.style('Your Key: %s (keep this safe)', fg='red', bold=True) % encrypt_key) + env_json_secrets_path = os.path.join(config, 'secrets.%s.json' % (env, )) + env_json_secrets = self._read_file(env_json_secrets_path) + if not env_json_secrets: + raise ValueError('File %s does not exist or is empty.' % env_json_secrets_path) + else: + fernet = Fernet(encrypt_key) + token = fernet.encrypt(env_json_secrets) + with open(env_json_secrets_path + '.enc', 'w') as encrypted_file: + encrypted_file.write(token) + return encrypted_file + + def decrypt(self, json_secrets_path): + encrypt_key = os.environ.get('DOJO_DECRYPT_KEY') + if not encrypt_key or not json_secrets_path: + raise ValueError('Missing a requirement for decrypting. Secrets path or DOJO_DECRYPT_KEY.') + secrets_file = self._read_file(json_secrets_path) + + # If there aren't any encrypted files, try loading the unecrypted file instead + if os.path.isfile(json_secrets_path.split('.enc')[0]) and not os.path.isfile(json_secrets_path): + try: + secrets = json.loads(open(json_secrets_path.split('.enc')[0]).read()) + except ValueError as e: + raise ValueError(e) + else: + try: + fernet = Fernet(encrypt_key) + out = fernet.decrypt(secrets_file.encode()) + # TODO proper error handling? https://cryptography.io/en/latest/fernet/#cryptography.fernet.Fernet.decrypt + except ValueError as e: + raise ValueError(e) + try: + secrets = json.loads(out) + except ValueError as e: + raise ValueError(e, out) + + return secrets + + def _read_file(self, path): + if os.path.isfile(path): + with open(path, 'r') as f: + return f.read() + else: + return {} diff --git a/setup.py b/setup.py index 0a746f449df545562e8401c7beab2f4f90dadc10..a7b477b35a822511e0e59a0befd1011eec19f80e 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ from setuptools import setup, find_packages setup( name='dojo', - version='0.0.40', + version='0.0.41', description='A framework for building and running your data platform.', author='Data Up', author_email='dojo@dataup.me', @@ -15,6 +15,7 @@ setup( install_requires=[ 'pyyaml', 'jsonschema', + 'cryptography', 'python-dateutil', 'click', ], diff --git a/tests/conftest.py b/tests/conftest.py index bc673a42fddc9e2b27b641da6bd67fa2394f084e..64144b9306bfcc35841b1a293d8122fa0a6907db 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -8,3 +8,11 @@ import tempfile def temp_dir(): with tempfile.TemporaryDirectory() as d: yield d + + +@pytest.fixture(scope='session') +def temp_config(tmpdir_factory): + config_dir = tmpdir_factory.mktemp('config') + json_file = config_dir.join('secrets.development.json') + json_file.write('{"some":"test"}') + return config_dir diff --git a/tests/test_secrets.py b/tests/test_secrets.py new file mode 100644 index 0000000000000000000000000000000000000000..82924cd03e2c12a01682292c4541895f3c6d5dd0 --- /dev/null +++ b/tests/test_secrets.py @@ -0,0 +1,29 @@ +from __future__ import absolute_import, print_function, unicode_literals + +import os + +from dojo.secrets import Secrets + + +class TestSecrets(object): + + def test_encrypt(self, temp_config, monkeypatch): + monkeypatch.setenv('DOJO_DECRYPT_KEY', 'Xwti2BVVb-ReShrBCI0A0C54x0yc8zmagz39gCsw0kA=') + encrypted_file = Secrets().encrypt(temp_config.strpath, 'development') + file = open(encrypted_file.name, 'r').read() + temp_file = os.path.join(temp_config.strpath, 'secrets.development.json.enc') + assert os.path.isfile(temp_file) + assert len(file) == 100 + + def test_decrypt(self, temp_config, monkeypatch): + monkeypatch.setenv('DOJO_DECRYPT_KEY', 'Xwti2BVVb-ReShrBCI0A0C54x0yc8zmagz39gCsw0kA=') + json_secrets_path = os.path.join(temp_config.strpath, 'secrets.development.json.enc') + secrets = Secrets().decrypt(json_secrets_path) + assert secrets == {'some': 'test'} + + def test_decrypt_no_enc(self, temp_config, monkeypatch): + monkeypatch.setenv('DOJO_DECRYPT_KEY', 'Xwti2BVVb-ReShrBCI0A0C54x0yc8zmagz39gCsw0kA=') + json_secrets_path = os.path.join(temp_config.strpath, 'secrets.development.json.enc') + os.remove(json_secrets_path) + secrets = Secrets().decrypt(json_secrets_path) + assert secrets == {'some': 'test'}