agent recovery improvements
This commit is contained in:
parent
f9657599c2
commit
6e048b2a12
|
@ -0,0 +1,18 @@
|
|||
# Generated by Django 3.1.3 on 2020-11-25 23:34
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('agents', '0025_auto_20201122_0407'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='recoveryaction',
|
||||
name='mode',
|
||||
field=models.CharField(choices=[('salt', 'Salt'), ('mesh', 'Mesh'), ('command', 'Command'), ('rpc', 'Nats RPC'), ('checkrunner', 'Checkrunner')], default='mesh', max_length=50),
|
||||
),
|
||||
]
|
|
@ -753,6 +753,7 @@ RECOVERY_CHOICES = [
|
|||
("mesh", "Mesh"),
|
||||
("command", "Command"),
|
||||
("rpc", "Nats RPC"),
|
||||
("checkrunner", "Checkrunner"),
|
||||
]
|
||||
|
||||
|
||||
|
|
|
@ -640,14 +640,17 @@ class TestAgentViews(TacticalTestCase):
|
|||
self.check_not_authenticated("post", url)
|
||||
|
||||
@patch("agents.models.Agent.nats_cmd")
|
||||
def test_recover_mesh(self, mock_ret):
|
||||
def test_recover_mesh(self, nats_cmd):
|
||||
url = f"/agents/{self.agent.pk}/recovermesh/"
|
||||
mock_ret.return_value = True
|
||||
nats_cmd.return_value = "ok"
|
||||
r = self.client.get(url)
|
||||
self.assertEqual(r.status_code, 200)
|
||||
self.assertIn(self.agent.hostname, r.data)
|
||||
nats_cmd.assert_called_with(
|
||||
{"func": "recover", "payload": {"mode": "mesh"}}, timeout=45
|
||||
)
|
||||
|
||||
mock_ret.return_value = "timeout"
|
||||
nats_cmd.return_value = "timeout"
|
||||
r = self.client.get(url)
|
||||
self.assertEqual(r.status_code, 400)
|
||||
|
||||
|
|
|
@ -636,25 +636,51 @@ def install_agent(request):
|
|||
@api_view(["POST"])
|
||||
def recover(request):
|
||||
agent = get_object_or_404(Agent, pk=request.data["pk"])
|
||||
mode = request.data["mode"]
|
||||
|
||||
if pyver.parse(agent.version) <= pyver.parse("0.9.5"):
|
||||
return notify_error("Only available in agent version greater than 0.9.5")
|
||||
|
||||
if not agent.has_nats:
|
||||
if mode == "tacagent" or mode == "checkrunner":
|
||||
return notify_error("Requires agent version 1.1.0 or greater")
|
||||
|
||||
# attempt a realtime recovery if supported, otherwise fall back to old recovery method
|
||||
if agent.has_nats:
|
||||
if (
|
||||
mode == "tacagent"
|
||||
or mode == "checkrunner"
|
||||
or mode == "salt"
|
||||
or mode == "mesh"
|
||||
):
|
||||
data = {"func": "recover", "payload": {"mode": mode}}
|
||||
r = asyncio.run(agent.nats_cmd(data, timeout=10))
|
||||
if r == "ok":
|
||||
return Response("Successfully completed recovery")
|
||||
|
||||
if agent.recoveryactions.filter(last_run=None).exists():
|
||||
return notify_error(
|
||||
"A recovery action is currently pending. Please wait for the next agent check-in."
|
||||
)
|
||||
|
||||
if request.data["mode"] == "command" and not request.data["cmd"]:
|
||||
if mode == "command" and not request.data["cmd"]:
|
||||
return notify_error("Command is required")
|
||||
|
||||
# if we've made it this far and realtime recovery didn't work,
|
||||
# tacagent service is the fallback recovery so we obv can't use that to recover itself if it's down
|
||||
if mode == "tacagent":
|
||||
return notify_error(
|
||||
"Requires RPC service to be functional. Please recover that first"
|
||||
)
|
||||
|
||||
# we should only get here if all other methods fail
|
||||
RecoveryAction(
|
||||
agent=agent,
|
||||
mode=request.data["mode"],
|
||||
command=request.data["cmd"] if request.data["mode"] == "command" else None,
|
||||
mode=mode,
|
||||
command=request.data["cmd"] if mode == "command" else None,
|
||||
).save()
|
||||
|
||||
return Response(f"Recovery will be attempted on the agent's next check-in")
|
||||
return Response("Recovery will be attempted on the agent's next check-in")
|
||||
|
||||
|
||||
@api_view(["POST"])
|
||||
|
@ -695,8 +721,10 @@ def recover_mesh(request, pk):
|
|||
agent = get_object_or_404(Agent, pk=pk)
|
||||
if not agent.has_nats:
|
||||
return notify_error("Requires agent version 1.1.0 or greater")
|
||||
r = asyncio.run(agent.nats_cmd({"func": "recovermesh"}, timeout=45))
|
||||
if r == "timeout":
|
||||
|
||||
data = {"func": "recover", "payload": {"mode": "mesh"}}
|
||||
r = asyncio.run(agent.nats_cmd(data, timeout=45))
|
||||
if r != "ok":
|
||||
return notify_error("Unable to contact the agent")
|
||||
|
||||
return Response(f"Repaired mesh agent on {agent.hostname}")
|
||||
|
|
|
@ -11,29 +11,31 @@
|
|||
<div class="q-gutter-sm">
|
||||
<q-radio dense v-model="mode" val="mesh" label="Mesh Agent" />
|
||||
<q-radio dense v-model="mode" val="rpc" label="Tactical RPC" />
|
||||
<q-radio dense v-model="mode" val="tacagent" label="Tactical Agent" />
|
||||
<q-radio dense v-model="mode" val="checkrunner" label="Tactical Checkrunner" />
|
||||
<q-radio dense v-model="mode" val="salt" label="Salt Minion" />
|
||||
<q-radio dense v-model="mode" val="command" label="Shell Command" />
|
||||
</div>
|
||||
</q-card-section>
|
||||
<q-card-section v-show="mode === 'mesh'">
|
||||
<p>Fix issues with the Mesh Agent which handles take control, terminal and file browser.</p>
|
||||
<p>Fix issues with the Mesh Agent which handles take control, live terminal and file browser.</p>
|
||||
</q-card-section>
|
||||
<q-card-section v-show="mode === 'tacagent'">
|
||||
<p>Fix issues with the TacticalAgent windows service which handles agent check-in and os info.</p>
|
||||
</q-card-section>
|
||||
<q-card-section v-show="mode === 'checkrunner'">
|
||||
<p>Fix issues with the Tactical Checkrunner windows service which handles running all checks.</p>
|
||||
</q-card-section>
|
||||
<q-card-section v-show="mode === 'salt'">
|
||||
<p>
|
||||
Fix issues with the salt-minion (do this if getting alot of errors about not being able to contact the agent
|
||||
even if it's online).
|
||||
</p>
|
||||
<p>Fix issues with the salt-minion which handles windows updates, chocolatey and scheduled tasks.</p>
|
||||
</q-card-section>
|
||||
<q-card-section v-show="mode === 'rpc'">
|
||||
<p>
|
||||
Fix issues with the Tactical RPC service (do this if getting alot of errors about not being able to contact
|
||||
the agent even if it's online).
|
||||
</p>
|
||||
<p>Fix issues with the Tactical RPC service which handles most of the agent's realtime functions.</p>
|
||||
</q-card-section>
|
||||
<q-card-section v-show="mode === 'command'">
|
||||
<p>Run a shell command on the agent.</p>
|
||||
<p>You should use the 'Send Command' feature from the agent's context menu for sending shell commands.</p>
|
||||
<p>Only use this as a last resort if unable to recover the salt-minion.</p>
|
||||
<p>Only use this as a last resort if unable to recover the Tactical RPC service.</p>
|
||||
<q-input
|
||||
ref="input"
|
||||
v-model="cmd"
|
||||
|
@ -82,6 +84,7 @@ export default {
|
|||
},
|
||||
methods: {
|
||||
recover() {
|
||||
this.$q.loading.show();
|
||||
const data = {
|
||||
pk: this.pk,
|
||||
cmd: this.cmd,
|
||||
|
@ -90,10 +93,12 @@ export default {
|
|||
this.$axios
|
||||
.post("/agents/recover/", data)
|
||||
.then(r => {
|
||||
this.$q.loading.hide();
|
||||
this.$emit("close");
|
||||
this.notifySuccess(r.data, 5000);
|
||||
})
|
||||
.catch(e => {
|
||||
this.$q.loading.hide();
|
||||
this.notifyError(e.response.data, 5000);
|
||||
});
|
||||
},
|
||||
|
|
Loading…
Reference in New Issue