Add support for virtual environments (#392)

* Initial virtual env support

* Make import ignoring part of model

* Add migrations

* Improve stdout/stderr behavior

* Fix stderr append

* Move venv setup to separate function

* Handle pip setup

* unit tests

* Add more settings and help text

* maybe windows

* windows stuff

* win

* dows

* more windows....

* Add test case for running script in venv

* test setup

* Remove debug print
This commit is contained in:
Chris Mitchell 2023-11-30 18:50:31 -05:00 committed by GitHub
parent b113ef6b4a
commit b1b74e8276
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 450 additions and 63 deletions

View File

@ -1,4 +1,4 @@
FROM python:3.7
FROM python:3.11
ARG HOST_USER=1000
ENV HOST_USER=${HOST_USER}
@ -14,7 +14,7 @@ ENV BUILD_DIR=${BUILD_DIR}
WORKDIR ${BUILD_DIR}
RUN chown wooey:wooey ${BUILD_DIR}
RUN pip install docker psycopg2
RUN pip install docker psycopg2 redis
COPY --chown=wooey:wooey setup.py MANIFEST.in Makefile README.md ${BUILD_DIR}/
COPY --chown=wooey:wooey scripts ${BUILD_DIR}/scripts

View File

@ -12,6 +12,7 @@ services:
- 8081:8080
depends_on:
- rabbit
- redis
- db
- celery
command: ./run-server
@ -22,6 +23,7 @@ services:
service: common
depends_on:
- rabbit
- redis
- db
command: watchmedo auto-restart --directory=$BUILD_DIR/wooey --recursive --ignore-patterns="*.pyc" -- celery -A $WOOEY_PROJECT worker -c 4 -B -l debug -s schedule
@ -38,3 +40,6 @@ services:
POSTGRES_USER: wooey
POSTGRES_PASSWORD: wooey
POSTGRES_DB: wooey
redis:
image: redis:7.2.3

View File

@ -6,15 +6,16 @@ WOOEY_ALLOW_ANONYMOUS = True
WOOEY_ENABLE_API_KEYS = True
## Celery related options
WOOEY_REALTIME_CACHE = "default"
CACHES = {
"default": {
"BACKEND": "django.core.cache.backends.redis.RedisCache",
"LOCATION": "redis://redis:6379",
}
}
## Celery related options
WOOEY_CELERY = True
broker_url = "amqp://guest@rabbit"
task_track_started = True
worker_send_task_events = True
imports = ("wooey.tasks",)
task_serializer = "json"
task_acks_late = True
# the directory for uploads (physical directory)
MEDIA_ROOT = os.path.join(BASE_DIR, "user_uploads") # noqa: F405

View File

@ -20,7 +20,7 @@ setup(
python_requires=">3.5.0",
install_requires=[
"celery>=4,<6",
"clinto>=0.3.0",
"clinto>=0.5.1",
"Django>=3,<5",
"django-autoslug",
"django-storages",

View File

@ -1,8 +1,11 @@
from __future__ import absolute_import
import os
import sys
from django.contrib.admin import ModelAdmin, site, TabularInline
from wooey import settings as wooey_settings
from .models import (
Script,
ScriptVersion,
@ -13,6 +16,7 @@ from .models import (
UserFile,
WooeyJob,
WooeyWidget,
VirtualEnvironment,
)
@ -117,6 +121,14 @@ class FileAdmin(ModelAdmin):
pass
class VirtualEnvironmentAdmin(ModelAdmin):
def get_changeform_initial_data(self, request):
return {
"python_binary": sys.executable,
"venv_directory": wooey_settings.WOOEY_VIRTUAL_ENVIRONMENT_DIRECTORY,
}
site.register(WooeyWidget)
site.register(WooeyJob, JobAdmin)
site.register(UserFile, FileAdmin)
@ -126,3 +138,4 @@ site.register(ScriptGroup, GroupAdmin)
site.register(ScriptParameterGroup, ParameterGroupAdmin)
site.register(ScriptParser, ScriptParserAdmin)
site.register(ScriptVersion, ScriptVersionAdmin)
site.register(VirtualEnvironment, VirtualEnvironmentAdmin)

View File

@ -1,4 +1,5 @@
from django import forms
from django.utils.translation import gettext_lazy as _
class SubmitForm(forms.Form):
@ -12,8 +13,19 @@ class SubmitForm(forms.Form):
class AddScriptForm(forms.Form):
group = forms.CharField(required=False)
default = forms.NullBooleanField(required=False)
ignore_bad_imports = forms.BooleanField(
required=False,
help_text=_(
"Ignore bad imports when adding scripts. This is useful if a script is under a virtual environment."
),
)
def clean_default(self):
if self.cleaned_data["default"] is None:
return True
return self.cleaned_data["default"]
def clean_ignore_bad_imports(self):
if self.cleaned_data["ignore_bad_imports"] is None:
return False
return self.cleaned_data["ignore_bad_imports"]

View File

@ -217,6 +217,7 @@ def add_or_update_script(request):
"group": group,
"script_name": script_name,
"set_default_version": data["default"],
"ignore_bad_imports": data["ignore_bad_imports"],
}
results = utils.add_wooey_script(**add_kwargs)
output = {

View File

@ -90,9 +90,13 @@ def purge_output(job=None):
user_file.delete()
def get_job_commands(job=None):
def get_job_commands(job=None, executable=None):
script_version = job.script_version
com = [sys.executable] if sys.executable else []
com = (
[executable]
if executable is not None
else ([sys.executable] if sys.executable else [])
)
com.extend([script_version.get_script_path()])
parameters = job.get_parameters()
@ -330,7 +334,9 @@ def add_wooey_script(
group=None,
script_name=None,
set_default_version=True,
ignore_bad_imports=False,
):
# There is a class called 'Script' which contains the general information about a script. However, that is not where the file details
# of the script lie. That is the ScriptVersion model. This allows the end user to tag a script as a favorite/etc. and set
# information such as script descriptions/names that do not constantly need to be updated with every version change. Thus,
@ -444,7 +450,11 @@ def add_wooey_script(
basename, extension = os.path.splitext(script)
filename = os.path.split(basename)[1]
parser = Parser(script_name=filename, script_path=local_storage.path(local_file))
parser = Parser(
script_name=filename,
script_path=local_storage.path(local_file),
ignore_bad_imports=ignore_bad_imports,
)
if not parser.valid:
return {
"valid": False,
@ -470,6 +480,7 @@ def add_wooey_script(
script_kwargs = {
"script_group": script_group,
"script_name": script_name or script_schema["name"],
"ignore_bad_imports": ignore_bad_imports,
}
version_kwargs = {
"script_version": version_string,

View File

@ -28,6 +28,11 @@ class Command(BaseCommand):
default=None,
help="The name of the script. Default: None (uses the filename)",
)
parser.add_argument(
"--ignore-bad-imports",
action="store_true",
help="Ignore failed imports. Useful when importing into a VirtualEnv",
)
parser.add_argument(
"--update", dest="update", action="store_true", help=argparse.SUPPRESS
)
@ -48,6 +53,7 @@ class Command(BaseCommand):
if not os.path.exists(script):
raise CommandError("{0} does not exist.".format(script))
group = options.get("group", wooey_settings.WOOEY_DEFAULT_SCRIPT_GROUP)
ignore_bad_imports = options.get("ignore_bad_imports")
scripts = (
[os.path.join(script, i) for i in os.listdir(script)]
if os.path.isdir(script)
@ -84,6 +90,7 @@ class Command(BaseCommand):
"script_path": script,
"group": group,
"script_name": base_name,
"ignore_bad_imports": ignore_bad_imports,
}
res = add_wooey_script(**add_kwargs)
if res["valid"]:

View File

@ -0,0 +1,45 @@
# Generated by Django 3.2.23 on 2023-11-22 02:05
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
("wooey", "0050_add_api_keys"),
]
operations = [
migrations.CreateModel(
name="VirtualEnvironment",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("name", models.CharField(max_length=25)),
("python_binary", models.CharField(max_length=1024)),
("requirements", models.TextField(null=True, blank=True)),
("venv_directory", models.CharField(max_length=1024)),
],
options={
"verbose_name": "virtual environment",
"verbose_name_plural": "virtual environments",
},
),
migrations.AddField(
model_name="script",
name="virtual_environment",
field=models.ForeignKey(
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="wooey.virtualenvironment",
),
),
]

View File

@ -0,0 +1,21 @@
# Generated by Django 3.2.23 on 2023-11-22 22:37
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("wooey", "0051_add_virtual_env"),
]
operations = [
migrations.AddField(
model_name="script",
name="ignore_bad_imports",
field=models.BooleanField(
default=False,
help_text="Ignore bad imports when adding scripts. This is useful if a script is under a virtual environment.",
),
),
]

View File

@ -59,6 +59,12 @@ class Script(models.Model):
script_order = models.PositiveSmallIntegerField(default=1)
is_active = models.BooleanField(default=True)
user_groups = models.ManyToManyField(Group, blank=True)
ignore_bad_imports = models.BooleanField(
default=False,
help_text=_(
"Ignore bad imports when adding scripts. This is useful if a script is under a virtual environment."
),
)
execute_full_path = models.BooleanField(
default=True
@ -70,6 +76,9 @@ class Script(models.Model):
help_text="By default save to the script name,"
" this will change the output folder.",
)
virtual_environment = models.ForeignKey(
"VirtualEnvironment", on_delete=models.SET_NULL, null=True, blank=True
)
created_date = models.DateTimeField(auto_now_add=True)
modified_date = models.DateTimeField(auto_now=True)
@ -259,10 +268,13 @@ class WooeyJob(models.Model):
param.recreate()
param.save()
self.status = self.SUBMITTED
rerun = kwargs.pop("rerun", False)
if rerun:
self.command = ""
self.save()
task_kwargs = {"wooey_job": self.pk, "rerun": kwargs.pop("rerun", False)}
task_kwargs = {"wooey_job": self.pk, "rerun": rerun}
if task_kwargs.get("rerun"):
if rerun:
utils.purge_output(job=self)
if wooey_settings.WOOEY_CELERY:
transaction.on_commit(lambda: tasks.submit_script.delay(**task_kwargs))
@ -717,3 +729,51 @@ class WooeyFile(models.Model):
def __str__(self):
return self.filepath.name
class VirtualEnvironment(models.Model):
name = models.CharField(
max_length=25, help_text=_("The name of the virtual environment.")
)
python_binary = models.CharField(
max_length=1024,
help_text=_(
'The binary to use for creating the virtual environment. Should be in your path (e.g. "python3" or "/usr/bin/python3")'
),
)
requirements = models.TextField(
null=True,
blank=True,
help_text=_(
'A list of requirements for the virtualenv. This gets passed directly to "pip install -r".'
),
)
venv_directory = models.CharField(
max_length=1024,
help_text=_("The directory to place the virtual environment under."),
)
class Meta:
app_label = "wooey"
verbose_name = _("virtual environment")
verbose_name_plural = _("virtual environments")
def get_venv_python_binary(self):
return os.path.join(
self.get_install_path(),
"Scripts" if wooey_settings.IS_WINDOWS else "bin",
"python.exe" if wooey_settings.IS_WINDOWS else "python",
)
def get_install_path(self, ensure_exists=False):
path = os.path.join(
self.venv_directory,
"".join(x for x in self.python_binary if x.isalnum()),
self.name,
)
if ensure_exists:
os.makedirs(path, exist_ok=True)
return path
def __str__(self):
return self.name

View File

@ -1,4 +1,8 @@
__author__ = "chris"
import os
import tempfile
from django.conf import settings
from django.utils.translation import gettext_lazy as _
@ -11,6 +15,8 @@ def get(key, default):
return getattr(settings, key, default)
IS_WINDOWS = os.name == "nt"
# AUTH based settings
WOOEY_ALLOW_ANONYMOUS = get("WOOEY_ALLOW_ANONYMOUS", True)
WOOEY_AUTH = get("WOOEY_AUTH", True)
@ -37,3 +43,8 @@ WOOEY_SCRIPT_DIR = get("WOOEY_SCRIPT_DIR", "wooey_scripts")
WOOEY_SHOW_LOCKED_SCRIPTS = get("WOOEY_SHOW_LOCKED_SCRIPTS", True)
WOOEY_SITE_NAME = get("WOOEY_SITE_NAME", _("Wooey!"))
WOOEY_SITE_TAG = get("WOOEY_SITE_TAG", _("A web UI for Python scripts"))
# Virtual Environment Settings
WOOEY_VIRTUAL_ENVIRONMENT_DIRECTORY = get(
"WOOEY_VIRTUAL_ENVIRONMENT_DIRECTORY", tempfile.gettempdir()
)

View File

@ -97,7 +97,9 @@ def script_version_postsave(instance, created, **kwargs):
not skip_script(instance) or getattr(instance, "_script_upgrade", False)
):
res = utils.add_wooey_script(
script_version=instance, group=instance.script.script_group
script_version=instance,
group=instance.script.script_group,
ignore_bad_imports=instance.script.ignore_bad_imports,
)
instance._script_upgrade = False
instance._script_cl_creation = False

View File

@ -93,6 +93,99 @@ def get_latest_script(script_version):
return False
def run_and_stream_command(command, cwd=None, job=None, stdout="", stderr=""):
proc = subprocess.Popen(
command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=cwd,
bufsize=0,
)
# We need to use subprocesses to capture the IO, otherwise they will block one another
# i.e. a check against stderr will sit waiting on stderr before returning
# we use Queues to communicate
qout, qerr = Queue(), Queue()
pout = output_monitor_queue(qout, proc.stdout)
perr = output_monitor_queue(qerr, proc.stderr)
prev_std = (stdout, stderr)
def check_output(job, stdout, stderr, prev_std):
# Check for updates from either (non-blocking)
stdout = update_from_output_queue(qout, stdout)
stderr = update_from_output_queue(qerr, stderr)
# If there are changes, update the db
if job is not None and (stdout, stderr) != prev_std:
job.update_realtime(stdout=stdout, stderr=stderr)
prev_std = (stdout, stderr)
return stdout, stderr, prev_std
# Loop until the process is complete + both stdout/stderr have EOFd
while proc.poll() is None or pout.is_alive() or perr.is_alive():
stdout, stderr, prev_std = check_output(job, stdout, stderr, prev_std)
# Catch any remaining output
try:
proc.stdout.flush()
except ValueError: # Handle if stdout is closed
pass
stdout, stderr, prev_std = check_output(job, stdout, stderr, prev_std)
return_code = proc.returncode
return (stdout, stderr, return_code)
def setup_venv(virtual_environment, job=None, stdout="", stderr=""):
venv_path = virtual_environment.get_install_path()
venv_executable = virtual_environment.get_venv_python_binary()
return_code = 0
if not os.path.exists(venv_path):
venv_command = [
virtual_environment.python_binary,
"-m",
"venv",
venv_path,
"--without-pip",
"--system-site-packages",
]
(stdout, stderr, return_code) = run_and_stream_command(
venv_command, cwd=None, job=job, stdout=stdout, stderr=stderr
)
if return_code:
raise Exception("VirtualEnv setup failed.\n{}\n{}".format(stdout, stderr))
pip_setup = [venv_executable, "-m", "pip", "install", "-I", "pip"]
(stdout, stderr, return_code) = run_and_stream_command(
pip_setup, cwd=None, job=job, stdout=stdout, stderr=stderr
)
if return_code:
raise Exception("Pip setup failed.\n{}\n{}".format(stdout, stderr))
requirements = virtual_environment.requirements
if requirements:
with tempfile.NamedTemporaryFile(
mode="w", prefix="requirements", suffix=".txt", delete=False
) as reqs_txt:
reqs_txt.write(requirements)
venv_command = [
venv_executable,
"-m",
"pip",
"install",
"-r",
reqs_txt.name,
]
(stdout, stderr, return_code) = run_and_stream_command(
venv_command, cwd=None, job=job, stdout=stdout, stderr=stderr
)
if return_code:
raise Exception("Requirements setup failed.\n{}\n{}".format(stdout, stderr))
os.remove(reqs_txt.name)
return (venv_executable, stdout, stderr, return_code)
@celery_app.task()
def submit_script(**kwargs):
job_id = kwargs.pop("wooey_job")
@ -100,10 +193,23 @@ def submit_script(**kwargs):
from .models import WooeyJob
job = WooeyJob.objects.get(pk=job_id)
job.update_realtime(delete=True)
stdout, stderr = "", ""
try:
command = utils.get_job_commands(job=job)
virtual_environment = job.script_version.script.virtual_environment
if virtual_environment:
(venv_executable, stdout, stderr, return_code) = setup_venv(
virtual_environment, job, stdout, stderr
)
if return_code:
raise Exception(
"Virtual env setup failed.\n{}\n{}".format(stdout, stderr)
)
else:
venv_executable = None
command = utils.get_job_commands(job=job, executable=venv_executable)
if resubmit:
# clone ourselves, setting pk=None seems hackish but it works
job.pk = None
@ -124,47 +230,10 @@ def submit_script(**kwargs):
job.status = WooeyJob.RUNNING
job.save()
proc = subprocess.Popen(
command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=abscwd,
bufsize=0,
stdout, stderr, return_code = run_and_stream_command(
command, abscwd, job, stdout, stderr
)
# We need to use subprocesses to capture the IO, otherwise they will block one another
# i.e. a check against stderr will sit waiting on stderr before returning
# we use Queues to communicate
qout, qerr = Queue(), Queue()
pout = output_monitor_queue(qout, proc.stdout)
perr = output_monitor_queue(qerr, proc.stderr)
prev_std = None
def check_output(job, stdout, stderr, prev_std):
# Check for updates from either (non-blocking)
stdout = update_from_output_queue(qout, stdout)
stderr = update_from_output_queue(qerr, stderr)
# If there are changes, update the db
if (stdout, stderr) != prev_std:
job.update_realtime(stdout=stdout, stderr=stderr)
prev_std = (stdout, stderr)
return stdout, stderr, prev_std
# Loop until the process is complete + both stdout/stderr have EOFd
while proc.poll() is None or pout.is_alive() or perr.is_alive():
stdout, stderr, prev_std = check_output(job, stdout, stderr, prev_std)
# Catch any remaining output
try:
proc.stdout.flush()
except ValueError: # Handle if stdout is closed
pass
stdout, stderr, prev_std = check_output(job, stdout, stderr, prev_std)
return_code = proc.returncode
# fetch the job again in case the database connection was lost during the job or something else changed.
job = WooeyJob.objects.get(pk=job_id)
# if there are files generated, make zip/tar files for download
@ -200,11 +269,11 @@ def submit_script(**kwargs):
try:
zip.write(path, arcname=archive_name)
except Exception:
stderr = "{}\n{}".format(stderr, traceback.format_exc())
stderr += "{}\n{}".format(stderr, traceback.format_exc())
try:
zip.close()
except Exception:
stderr = "{}\n{}".format(stderr, traceback.format_exc())
stderr += "{}\n{}".format(stderr, traceback.format_exc())
# save all the files generated as well to our default storage for ephemeral storage setups
if wooey_settings.WOOEY_EPHEMERAL_FILES:
@ -223,9 +292,8 @@ def submit_script(**kwargs):
job.status = WooeyJob.COMPLETED if return_code == 0 else WooeyJob.FAILED
job.update_realtime(delete=True)
except Exception:
stderr = "{}\n{}".format(stderr, traceback.format_exc())
stderr += "{}\n{}".format(stderr, traceback.format_exc())
job.status = WooeyJob.ERROR
job.stdout = stdout
job.stderr = stderr
job.save()

View File

@ -1,7 +1,18 @@
import sys
import tempfile
import factory
from django.contrib.auth import get_user_model
from ..models import APIKey, Script, ScriptGroup, WooeyJob, WooeyProfile, WooeyWidget
from ..models import (
APIKey,
Script,
ScriptGroup,
VirtualEnvironment,
WooeyJob,
WooeyProfile,
WooeyWidget,
)
from . import utils as test_utils
@ -85,12 +96,24 @@ class WooeyWidgetFactory(factory.DjangoModelFactory):
name = "test widget"
def generate_script(script_path, script_name=None):
class VirtualEnvFactory(factory.DjangoModelFactory):
class Meta:
model = VirtualEnvironment
name = factory.Sequence(lambda n: "venv_%d" % n)
python_binary = sys.executable
venv_directory = tempfile.gettempdir()
def generate_script(script_path, script_name=None, ignore_bad_imports=False):
new_file = test_utils.save_script_path(script_path)
from ..backend import utils
res = utils.add_wooey_script(
script_name=script_name, script_path=new_file, group=None
script_name=script_name,
script_path=new_file,
group=None,
ignore_bad_imports=ignore_bad_imports,
)
return res["script"]

View File

@ -0,0 +1,17 @@
import argparse
import sys
import pandas as pd
parser = argparse.ArgumentParser(description="Something")
def main():
df = pd.DataFrame()
print(df)
if __name__ == "__main__":
args = parser.parse_args()
sys.stdout.write("{}".format(args))
sys.exit(main())

View File

@ -0,0 +1,90 @@
import os
import shutil
import subprocess
from unittest import mock
from django.test import TransactionTestCase
from wooey import settings as wooey_settings
from wooey.backend.utils import create_wooey_job
from wooey.models import WooeyJob
from wooey.tasks import setup_venv
from . import config
from .factories import VirtualEnvFactory, generate_script
class TestVirtualEnvironments(TransactionTestCase):
def setUp(self):
super().setUp()
self.venv = VirtualEnvFactory()
install_path = self.venv.get_install_path()
if os.path.exists(install_path):
shutil.rmtree(install_path)
def test_sets_up_virtual_env(self):
venv = self.venv
(venv_executable, stdout, stderr, return_code) = setup_venv(venv)
self.assertTrue(os.path.exists(venv_executable))
def test_reuses_virtual_env(self):
venv = self.venv
(venv_executable, stdout, stderr, return_code) = setup_venv(venv)
self.assertTrue(os.path.exists(venv_executable))
with mock.patch("wooey.tasks.run_and_stream_command") as command_runner:
command_runner.return_value = ("stdout", "stderr", 0)
setup_venv(venv)
self.assertFalse(command_runner.called)
def test_installs_pip(self):
venv = self.venv
setup_venv(venv)
if wooey_settings.IS_WINDOWS:
self.assertTrue(
os.path.exists(
os.path.join(venv.get_install_path(), "Scripts", "pip.exe")
)
)
else:
self.assertTrue(
os.path.exists(os.path.join(venv.get_install_path(), "bin", "pip"))
)
def test_installs_requirements(self):
venv = self.venv
venv.requirements = "flask"
venv.save()
setup_venv(venv)
binary = venv.get_venv_python_binary()
results = subprocess.run(
[binary, "-m" "pip", "freeze", "--local"], capture_output=True
)
packages = results.stdout.decode().lower()
self.assertIn("flask", packages)
def test_job_can_run_in_venv(self):
# For this, we install a package that is only in the venv (pandas) and make sure it runs
pandas_script_path = os.path.join(
config.WOOEY_TEST_SCRIPTS, "venv_pandas_test.py"
)
pandas_script_version = generate_script(
pandas_script_path,
script_name="pandas-test",
ignore_bad_imports=True,
)
pandas_script = pandas_script_version.script
venv = self.venv
venv.requirements = "pandas"
venv.save()
pandas_script.virtual_environment = venv
pandas_script.save()
job = create_wooey_job(
script_version_pk=pandas_script_version.pk,
data={
"job_name": "abc",
},
)
self.assertEqual(job.status, WooeyJob.SUBMITTED)
job = job.submit_to_celery()
job.refresh_from_db()
self.assertEqual(job.status, WooeyJob.COMPLETED)