|
var url = $"https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch={query.Replace(" ", "+")}&srlimit=1&srnamespace=0&format=json";
var response = await httpClient.GetAsync(url);
var jsonResult = await response.Content.ReadAsAsync<JObject>();
var title = jsonResult.SelectToken("query.search[0].title").Value<string>();
url = $"https://en.wikipedia.org/wiki/{title.Replace(" ", "_")}";
response = await httpClient.GetAsync(url);
var htmlContent = await response.Content.ReadAsStringAsync();
var pageDocument = new HtmlDocument();
pageDocument.LoadHtml(htmlContent);
string text = null;
var nodes = pageDocument.DocumentNode.SelectNodes("//div[contains(@id, 'mw-content-text')]//p");
if (nodes != null)
{
text = nodes.First(n => !n.InnerText.StartsWith("\n")).InnerText;
}
while (text.Length > 500)
{
text = text.Remove(text.LastIndexOf("."));
}
// remove the references [1]
text = Regex.Replace(text, @"[\d{1,3}]", "");
if (!text.Trim().EndsWith("."))
{
text += ".";
}
if (text.Contains("may refer to:"))
{
text = "Your search is a bit ambiguous, could you specify your search terms?";
}
|