using HtmlAgilityPack;
using System.Text.RegularExpressions;
namespace Shared.Engine
{
public class HtmlParse
{
public List nodes { get; private set; } = new List();
public HtmlParse(string html, string xpathNodes)
{
var doc = new HtmlDocument();
doc.LoadHtml(html);
var _nodes = doc.DocumentNode?.SelectNodes(xpathNodes);
if (_nodes == null)
return;
foreach (var node in _nodes)
nodes.Add(new HtmlRowParse(node));
}
public static List Nodes(string html, string xpathNodes)
{
return new HtmlParse(html, xpathNodes).nodes;
}
}
public class HtmlRowParse
{
public HtmlNode row { get; private set; }
public HtmlRowParse(HtmlNode node)
{
row = node;
}
#region SelectText
public string SelectText(string xpath, string attribute = null, string[] attributes = null)
{
string value = null;
if (string.IsNullOrEmpty(xpath) && (!string.IsNullOrEmpty(attribute) || attributes != null))
{
if (attributes != null)
{
foreach (var attr in attributes)
{
string attrValue = row.GetAttributeValue(attr, null);
if (!string.IsNullOrWhiteSpace(attrValue))
{
value = attrValue;
break;
}
}
}
else
{
value = row.GetAttributeValue(attribute, null);
}
}
else
{
var inNode = row.SelectSingleNode(xpath);
if (inNode != null)
{
if (attributes != null)
{
foreach (var attr in attributes)
{
string attrValue = inNode.GetAttributeValue(attr, null);
if (!string.IsNullOrWhiteSpace(attrValue))
{
value = attrValue;
break;
}
}
}
else
{
value = (!string.IsNullOrEmpty(attribute) ? inNode.GetAttributeValue(attribute, null) : inNode.InnerText);
}
}
}
if (string.IsNullOrWhiteSpace(value))
return null;
return value?.Trim();
}
#endregion
#region SelectHtml
public string SelectHtml(string xpath)
{
var inNode = row.SelectSingleNode(xpath);
if (inNode != null)
{
string html = inNode.InnerHtml;
if (string.IsNullOrWhiteSpace(html))
return null;
return inNode.InnerHtml;
}
return null;
}
#endregion
#region Regex
public string Regex(string xpath, string pattern, int index = 1, RegexOptions options = RegexOptions.IgnoreCase)
{
string html = SelectHtml(pattern);
if (string.IsNullOrWhiteSpace(html))
return null;
string res = System.Text.RegularExpressions.Regex.Match(html, pattern, options).Groups[index].Value;
if (string.IsNullOrWhiteSpace(res))
return null;
return res.Trim();
}
public string Regex(string xpath, string pattern, string groupName, RegexOptions options = RegexOptions.IgnoreCase)
{
string html = SelectHtml(pattern);
if (string.IsNullOrWhiteSpace(html))
return null;
string res = System.Text.RegularExpressions.Regex.Match(html, pattern, options).Groups[groupName].Value;
if (string.IsNullOrWhiteSpace(res))
return null;
return res.Trim();
}
public string Regex(string pattern, int index = 1, RegexOptions options = RegexOptions.IgnoreCase)
{
string res = System.Text.RegularExpressions.Regex.Match(row.InnerHtml, pattern, options).Groups[index].Value;
if (string.IsNullOrWhiteSpace(res))
return null;
return res.Trim();
}
public string Regex(string pattern, string groupName, RegexOptions options = RegexOptions.IgnoreCase)
{
string res = System.Text.RegularExpressions.Regex.Match(row.InnerHtml, pattern, options).Groups[groupName].Value;
if (string.IsNullOrWhiteSpace(res))
return null;
return res.Trim();
}
#endregion
}
}