refactor(makhno): improve file array extraction logic

Replace regex-based file array extraction with a more robust bracket matching algorithm that properly handles nested structures. The new implementation uses manual parsing to track bracket depth, ensuring correct extraction of JSON arrays from HTML content.
2026-04-16 17:32:20 +00:00 · 2026-02-03 20:01:22 +02:00 · 2026-02-03 20:01:22 +02:00 · 6208c14ef0
commit 6208c14ef0
parent ed7bfa67de
1 changed files with 36 additions and 11 deletions
--- a/Makhno/MakhnoInvoke.cs
+++ b/Makhno/MakhnoInvoke.cs
@ -366,20 +366,45 @@ namespace Makhno
            if (string.IsNullOrEmpty(html))
                return null;

-            var matches = new[]
-            {
-                Regex.Match(html, @"file\s*:\s*'(\[.*\])'", RegexOptions.Singleline),
-                Regex.Match(html, @"file\s*:\s*""(\[.*\])""", RegexOptions.Singleline),
-                Regex.Match(html, @"file\s*:\s*(\[[\s\S]*?\])", RegexOptions.Singleline)
-            };
+            var startIndex = FindFileArrayStart(html);
+            if (startIndex < 0)
+                return null;

-            foreach (var match in matches)
+            string jsonArray = ExtractBracketArray(html, startIndex);
+            if (string.IsNullOrEmpty(jsonArray))
+                return null;
+
+            return jsonArray
+                .Replace("\\'", "'")
+                .Replace("\\\"", "\"");
+        }
+
+        private int FindFileArrayStart(string html)
+        {
+            int fileIndex = html.IndexOf("file", StringComparison.OrdinalIgnoreCase);
+            if (fileIndex < 0)
+                return -1;
+
+            int bracketIndex = html.IndexOf('[', fileIndex);
+            return bracketIndex;
+        }
+
+        private string ExtractBracketArray(string text, int startIndex)
+        {
+            if (startIndex < 0 || startIndex >= text.Length || text[startIndex] != '[')
+                return null;
+
+            int depth = 0;
+            for (int i = startIndex; i < text.Length; i++)
            {
-                if (match.Success)
+                char ch = text[i];
+                if (ch == '[')
+                    depth++;
+                else if (ch == ']')
                {
-                    return match.Groups[1].Value
-                        .Replace("\\'", "'")
-                        .Replace("\\\"", "\"");
+                    depth--;
+                    if (depth == 0)
+                        return text.Substring(startIndex, i - startIndex + 1);
                }
            }