agent recovery improvements

This commit is contained in:
wh1te909 2020-11-25 23:48:14 +00:00
parent f9657599c2
commit 6e048b2a12
5 changed files with 74 additions and 19 deletions

View File

@ -0,0 +1,18 @@
# Generated by Django 3.1.3 on 2020-11-25 23:34
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('agents', '0025_auto_20201122_0407'),
]
operations = [
migrations.AlterField(
model_name='recoveryaction',
name='mode',
field=models.CharField(choices=[('salt', 'Salt'), ('mesh', 'Mesh'), ('command', 'Command'), ('rpc', 'Nats RPC'), ('checkrunner', 'Checkrunner')], default='mesh', max_length=50),
),
]

View File

@ -753,6 +753,7 @@ RECOVERY_CHOICES = [
("mesh", "Mesh"),
("command", "Command"),
("rpc", "Nats RPC"),
("checkrunner", "Checkrunner"),
]

View File

@ -640,14 +640,17 @@ class TestAgentViews(TacticalTestCase):
self.check_not_authenticated("post", url)
@patch("agents.models.Agent.nats_cmd")
def test_recover_mesh(self, mock_ret):
def test_recover_mesh(self, nats_cmd):
url = f"/agents/{self.agent.pk}/recovermesh/"
mock_ret.return_value = True
nats_cmd.return_value = "ok"
r = self.client.get(url)
self.assertEqual(r.status_code, 200)
self.assertIn(self.agent.hostname, r.data)
nats_cmd.assert_called_with(
{"func": "recover", "payload": {"mode": "mesh"}}, timeout=45
)
mock_ret.return_value = "timeout"
nats_cmd.return_value = "timeout"
r = self.client.get(url)
self.assertEqual(r.status_code, 400)

View File

@ -636,25 +636,51 @@ def install_agent(request):
@api_view(["POST"])
def recover(request):
agent = get_object_or_404(Agent, pk=request.data["pk"])
mode = request.data["mode"]
if pyver.parse(agent.version) <= pyver.parse("0.9.5"):
return notify_error("Only available in agent version greater than 0.9.5")
if not agent.has_nats:
if mode == "tacagent" or mode == "checkrunner":
return notify_error("Requires agent version 1.1.0 or greater")
# attempt a realtime recovery if supported, otherwise fall back to old recovery method
if agent.has_nats:
if (
mode == "tacagent"
or mode == "checkrunner"
or mode == "salt"
or mode == "mesh"
):
data = {"func": "recover", "payload": {"mode": mode}}
r = asyncio.run(agent.nats_cmd(data, timeout=10))
if r == "ok":
return Response("Successfully completed recovery")
if agent.recoveryactions.filter(last_run=None).exists():
return notify_error(
"A recovery action is currently pending. Please wait for the next agent check-in."
)
if request.data["mode"] == "command" and not request.data["cmd"]:
if mode == "command" and not request.data["cmd"]:
return notify_error("Command is required")
# if we've made it this far and realtime recovery didn't work,
# tacagent service is the fallback recovery so we obv can't use that to recover itself if it's down
if mode == "tacagent":
return notify_error(
"Requires RPC service to be functional. Please recover that first"
)
# we should only get here if all other methods fail
RecoveryAction(
agent=agent,
mode=request.data["mode"],
command=request.data["cmd"] if request.data["mode"] == "command" else None,
mode=mode,
command=request.data["cmd"] if mode == "command" else None,
).save()
return Response(f"Recovery will be attempted on the agent's next check-in")
return Response("Recovery will be attempted on the agent's next check-in")
@api_view(["POST"])
@ -695,8 +721,10 @@ def recover_mesh(request, pk):
agent = get_object_or_404(Agent, pk=pk)
if not agent.has_nats:
return notify_error("Requires agent version 1.1.0 or greater")
r = asyncio.run(agent.nats_cmd({"func": "recovermesh"}, timeout=45))
if r == "timeout":
data = {"func": "recover", "payload": {"mode": "mesh"}}
r = asyncio.run(agent.nats_cmd(data, timeout=45))
if r != "ok":
return notify_error("Unable to contact the agent")
return Response(f"Repaired mesh agent on {agent.hostname}")

View File

@ -11,29 +11,31 @@
<div class="q-gutter-sm">
<q-radio dense v-model="mode" val="mesh" label="Mesh Agent" />
<q-radio dense v-model="mode" val="rpc" label="Tactical RPC" />
<q-radio dense v-model="mode" val="tacagent" label="Tactical Agent" />
<q-radio dense v-model="mode" val="checkrunner" label="Tactical Checkrunner" />
<q-radio dense v-model="mode" val="salt" label="Salt Minion" />
<q-radio dense v-model="mode" val="command" label="Shell Command" />
</div>
</q-card-section>
<q-card-section v-show="mode === 'mesh'">
<p>Fix issues with the Mesh Agent which handles take control, terminal and file browser.</p>
<p>Fix issues with the Mesh Agent which handles take control, live terminal and file browser.</p>
</q-card-section>
<q-card-section v-show="mode === 'tacagent'">
<p>Fix issues with the TacticalAgent windows service which handles agent check-in and os info.</p>
</q-card-section>
<q-card-section v-show="mode === 'checkrunner'">
<p>Fix issues with the Tactical Checkrunner windows service which handles running all checks.</p>
</q-card-section>
<q-card-section v-show="mode === 'salt'">
<p>
Fix issues with the salt-minion (do this if getting alot of errors about not being able to contact the agent
even if it's online).
</p>
<p>Fix issues with the salt-minion which handles windows updates, chocolatey and scheduled tasks.</p>
</q-card-section>
<q-card-section v-show="mode === 'rpc'">
<p>
Fix issues with the Tactical RPC service (do this if getting alot of errors about not being able to contact
the agent even if it's online).
</p>
<p>Fix issues with the Tactical RPC service which handles most of the agent's realtime functions.</p>
</q-card-section>
<q-card-section v-show="mode === 'command'">
<p>Run a shell command on the agent.</p>
<p>You should use the 'Send Command' feature from the agent's context menu for sending shell commands.</p>
<p>Only use this as a last resort if unable to recover the salt-minion.</p>
<p>Only use this as a last resort if unable to recover the Tactical RPC service.</p>
<q-input
ref="input"
v-model="cmd"
@ -82,6 +84,7 @@ export default {
},
methods: {
recover() {
this.$q.loading.show();
const data = {
pk: this.pk,
cmd: this.cmd,
@ -90,10 +93,12 @@ export default {
this.$axios
.post("/agents/recover/", data)
.then(r => {
this.$q.loading.hide();
this.$emit("close");
this.notifySuccess(r.data, 5000);
})
.catch(e => {
this.$q.loading.hide();
this.notifyError(e.response.data, 5000);
});
},