feat: Add ignore case and UTF16 search options to sequence searching

This commit is contained in:
WerWolv 2023-12-19 14:34:35 +01:00
parent c7ab4a4569
commit 96db2074c6
6 changed files with 150 additions and 9 deletions

View File

@ -6,4 +6,11 @@ skip -rfu ^__gnu_debug::
skip -rfu ^ImGui::
# Trigger breakpoint when execution reaches triggerSafeShutdown()
break triggerSafeShutdown
break triggerSafeShutdown
# Print backtrace after execution jumped to an invalid address
define fixbt
set $pc = *(void **)$rsp
set $rsp = $rsp + 8
bt
end

View File

@ -78,6 +78,8 @@ namespace hex {
[[nodiscard]] std::string encodeByteString(const std::vector<u8> &bytes);
[[nodiscard]] std::vector<u8> decodeByteString(const std::string &string);
std::wstring utf8ToUtf16(const std::string& utf8);
[[nodiscard]] constexpr u64 extract(u8 from, u8 to, const std::unsigned_integral auto &value) {
if (from < to) std::swap(from, to);

View File

@ -485,6 +485,70 @@ namespace hex {
return result;
}
std::wstring utf8ToUtf16(const std::string& utf8) {
std::vector<u32> unicodes;
for (size_t byteIndex = 0; byteIndex < utf8.size();) {
u32 unicode = 0;
size_t unicodeSize = 0;
u8 ch = utf8[byteIndex];
byteIndex += 1;
if (ch <= 0x7F) {
unicode = ch;
unicodeSize = 0;
} else if (ch <= 0xBF) {
return { };
} else if (ch <= 0xDF) {
unicode = ch&0x1F;
unicodeSize = 1;
} else if (ch <= 0xEF) {
unicode = ch&0x0F;
unicodeSize = 2;
} else if (ch <= 0xF7) {
unicode = ch&0x07;
unicodeSize = 3;
} else {
return { };
}
for (size_t unicodeByteIndex = 0; unicodeByteIndex < unicodeSize; unicodeByteIndex += 1) {
if (byteIndex == utf8.size())
return { };
u8 byte = utf8[byteIndex];
if (byte < 0x80 || byte > 0xBF)
return { };
unicode <<= 6;
unicode += byte & 0x3F;
byteIndex += 1;
}
if (unicode >= 0xD800 && unicode <= 0xDFFF)
return { };
if (unicode > 0x10FFFF)
return { };
unicodes.push_back(unicode);
}
std::wstring utf16;
for (auto unicode : unicodes) {
if (unicode <= 0xFFFF)
utf16 += static_cast<wchar_t>(unicode);
else {
unicode -= 0x10000;
utf16 += static_cast<wchar_t>(((unicode >> 10) + 0xD800));
utf16 += static_cast<wchar_t>(((unicode & 0x3FF) + 0xDC00));
}
}
return utf16;
}
float float16ToFloat32(u16 float16) {
u32 sign = float16 >> 15;
u32 exponent = (float16 >> 10) & 0x1F;

View File

@ -63,6 +63,9 @@ namespace hex::plugin::builtin {
struct Sequence {
std::string sequence;
StringType type = StringType::ASCII;
bool ignoreCase = false;
} bytes;
struct Regex {

View File

@ -895,6 +895,7 @@
"hex.builtin.view.find.search.reset": "Reset",
"hex.builtin.view.find.searching": "Searching...",
"hex.builtin.view.find.sequences": "Sequences",
"hex.builtin.view.find.sequences.ignore_case": "Ignore case",
"hex.builtin.view.find.shortcut.select_all": "Select All Occurrences",
"hex.builtin.view.find.strings": "Strings",
"hex.builtin.view.find.strings.chars": "Characters",

View File

@ -6,6 +6,7 @@
#include <hex/providers/buffered_reader.hpp>
#include <array>
#include <ranges>
#include <regex>
#include <string>
#include <utility>
@ -23,7 +24,7 @@ namespace hex::plugin::builtin {
if (m_searchTask.isRunning())
return { };
if (!m_occurrenceTree->overlapping({ address, address + size }).empty())
if (!m_occurrenceTree->overlapping({ address, address }).empty())
return HighlightColor();
else
return std::nullopt;
@ -258,23 +259,74 @@ namespace hex::plugin::builtin {
reader.seek(searchRegion.getStartAddress());
reader.setEndAddress(searchRegion.getEndAddress());
auto bytes = hex::decodeByteString(settings.sequence);
if (bytes.empty())
auto input = hex::decodeByteString(settings.sequence);
if (input.empty())
return { };
std::vector<u8> bytes;
Occurrence::DecodeType decodeType = Occurrence::DecodeType::Binary;
std::endian endian;
switch (settings.type) {
default:
case SearchSettings::StringType::ASCII:
bytes = input;
decodeType = Occurrence::DecodeType::ASCII;
endian = std::endian::native;
break;
case SearchSettings::StringType::UTF16LE: {
auto wString = hex::utf8ToUtf16({ input.begin(), input.end() });
bytes.resize(wString.size() * 2);
std::memcpy(bytes.data(), wString.data(), bytes.size());
decodeType = Occurrence::DecodeType::UTF16;
endian = std::endian::little;
break;
}
case SearchSettings::StringType::UTF16BE: {
auto wString = hex::utf8ToUtf16({ input.begin(), input.end() });
bytes.resize(wString.size() * 2);
std::memcpy(bytes.data(), wString.data(), bytes.size());
decodeType = Occurrence::DecodeType::UTF16;
endian = std::endian::big;
for (size_t i = 0; i < bytes.size(); i += 2)
std::swap(bytes[i], bytes[i + 1]);
break;
}
}
auto occurrence = reader.begin();
u64 progress = 0;
auto searchPredicate = [&] -> bool(*)(u8, u8) {
if (!settings.ignoreCase)
return [](u8 left, u8 right) -> bool {
return left == right;
};
else
return [](u8 left, u8 right) -> bool {
if (std::isupper(left))
left = std::tolower(left);
if (std::isupper(right))
right = std::tolower(right);
return left == right;
};
}();
while (true) {
task.update(progress);
occurrence = std::search(reader.begin(), reader.end(), std::boyer_moore_horspool_searcher(bytes.begin(), bytes.end()));
occurrence = std::search(reader.begin(), reader.end(), std::default_searcher(bytes.begin(), bytes.end(), searchPredicate));
if (occurrence == reader.end())
break;
auto address = occurrence.getAddress();
reader.seek(address + 1);
results.push_back(Occurrence{ Region { address, bytes.size() }, Occurrence::DecodeType::Binary, std::endian::native, false });
results.push_back(Occurrence{ Region { address, bytes.size() }, decodeType, endian, false });
progress = address - searchRegion.getStartAddress();
}
@ -497,6 +549,8 @@ namespace hex::plugin::builtin {
case Value:
case Strings:
case Sequence:
case Regex:
{
switch (occurrence.decodeType) {
using enum Occurrence::DecodeType;
@ -523,8 +577,6 @@ namespace hex::plugin::builtin {
}
}
break;
case Sequence:
case Regex:
case BinaryPattern:
result = hex::encodeByteString(bytes);
break;
@ -661,6 +713,18 @@ namespace hex::plugin::builtin {
ImGuiExt::InputTextIcon("hex.builtin.common.value"_lang, ICON_VS_SYMBOL_KEY, settings.sequence);
if (ImGui::BeginCombo("hex.builtin.common.type"_lang, StringTypes[std::to_underlying(settings.type)].c_str())) {
for (size_t i = 0; i < StringTypes.size() - 2; i++) {
auto type = static_cast<SearchSettings::StringType>(i);
if (ImGui::Selectable(StringTypes[i].c_str(), type == settings.type))
settings.type = type;
}
ImGui::EndCombo();
}
ImGui::Checkbox("hex.builtin.view.find.sequences.ignore_case"_lang, &settings.ignoreCase);
m_settingsValid = !settings.sequence.empty() && !hex::decodeByteString(settings.sequence).empty();
ImGui::EndTabItem();