mirror of
https://github.com/lampame/lampac-ukraine.git
synced 2026-04-16 17:32:20 +00:00
refactor(makhno): improve file array extraction logic
Replace regex-based file array extraction with a more robust bracket matching algorithm that properly handles nested structures. The new implementation uses manual parsing to track bracket depth, ensuring correct extraction of JSON arrays from HTML content.
This commit is contained in:
parent
ed7bfa67de
commit
6208c14ef0
@ -366,20 +366,45 @@ namespace Makhno
|
||||
if (string.IsNullOrEmpty(html))
|
||||
return null;
|
||||
|
||||
var matches = new[]
|
||||
{
|
||||
Regex.Match(html, @"file\s*:\s*'(\[.*\])'", RegexOptions.Singleline),
|
||||
Regex.Match(html, @"file\s*:\s*""(\[.*\])""", RegexOptions.Singleline),
|
||||
Regex.Match(html, @"file\s*:\s*(\[[\s\S]*?\])", RegexOptions.Singleline)
|
||||
};
|
||||
var startIndex = FindFileArrayStart(html);
|
||||
if (startIndex < 0)
|
||||
return null;
|
||||
|
||||
foreach (var match in matches)
|
||||
string jsonArray = ExtractBracketArray(html, startIndex);
|
||||
if (string.IsNullOrEmpty(jsonArray))
|
||||
return null;
|
||||
|
||||
return jsonArray
|
||||
.Replace("\\'", "'")
|
||||
.Replace("\\\"", "\"");
|
||||
}
|
||||
|
||||
private int FindFileArrayStart(string html)
|
||||
{
|
||||
int fileIndex = html.IndexOf("file", StringComparison.OrdinalIgnoreCase);
|
||||
if (fileIndex < 0)
|
||||
return -1;
|
||||
|
||||
int bracketIndex = html.IndexOf('[', fileIndex);
|
||||
return bracketIndex;
|
||||
}
|
||||
|
||||
private string ExtractBracketArray(string text, int startIndex)
|
||||
{
|
||||
if (startIndex < 0 || startIndex >= text.Length || text[startIndex] != '[')
|
||||
return null;
|
||||
|
||||
int depth = 0;
|
||||
for (int i = startIndex; i < text.Length; i++)
|
||||
{
|
||||
if (match.Success)
|
||||
char ch = text[i];
|
||||
if (ch == '[')
|
||||
depth++;
|
||||
else if (ch == ']')
|
||||
{
|
||||
return match.Groups[1].Value
|
||||
.Replace("\\'", "'")
|
||||
.Replace("\\\"", "\"");
|
||||
depth--;
|
||||
if (depth == 0)
|
||||
return text.Substring(startIndex, i - startIndex + 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user