From 6208c14ef0b1cd92b672099cfee6169c37c5a5c8 Mon Sep 17 00:00:00 2001 From: baliasnyifeliks Date: Tue, 3 Feb 2026 20:01:22 +0200 Subject: [PATCH] refactor(makhno): improve file array extraction logic Replace regex-based file array extraction with a more robust bracket matching algorithm that properly handles nested structures. The new implementation uses manual parsing to track bracket depth, ensuring correct extraction of JSON arrays from HTML content. --- Makhno/MakhnoInvoke.cs | 47 ++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/Makhno/MakhnoInvoke.cs b/Makhno/MakhnoInvoke.cs index 260024c..825df49 100644 --- a/Makhno/MakhnoInvoke.cs +++ b/Makhno/MakhnoInvoke.cs @@ -366,20 +366,45 @@ namespace Makhno if (string.IsNullOrEmpty(html)) return null; - var matches = new[] - { - Regex.Match(html, @"file\s*:\s*'(\[.*\])'", RegexOptions.Singleline), - Regex.Match(html, @"file\s*:\s*""(\[.*\])""", RegexOptions.Singleline), - Regex.Match(html, @"file\s*:\s*(\[[\s\S]*?\])", RegexOptions.Singleline) - }; + var startIndex = FindFileArrayStart(html); + if (startIndex < 0) + return null; - foreach (var match in matches) + string jsonArray = ExtractBracketArray(html, startIndex); + if (string.IsNullOrEmpty(jsonArray)) + return null; + + return jsonArray + .Replace("\\'", "'") + .Replace("\\\"", "\""); + } + + private int FindFileArrayStart(string html) + { + int fileIndex = html.IndexOf("file", StringComparison.OrdinalIgnoreCase); + if (fileIndex < 0) + return -1; + + int bracketIndex = html.IndexOf('[', fileIndex); + return bracketIndex; + } + + private string ExtractBracketArray(string text, int startIndex) + { + if (startIndex < 0 || startIndex >= text.Length || text[startIndex] != '[') + return null; + + int depth = 0; + for (int i = startIndex; i < text.Length; i++) { - if (match.Success) + char ch = text[i]; + if (ch == '[') + depth++; + else if (ch == ']') { - return match.Groups[1].Value - .Replace("\\'", "'") - .Replace("\\\"", "\""); + depth--; + if (depth == 0) + return text.Substring(startIndex, i - startIndex + 1); } }