Add basic HTML parsing
This commit is contained in:
parent
387909b0c6
commit
a7250ceacf
6
.gitignore
vendored
6
.gitignore
vendored
@ -2,4 +2,8 @@
|
|||||||
/quests
|
/quests
|
||||||
/rust
|
/rust
|
||||||
/obj
|
/obj
|
||||||
/bin
|
/bin
|
||||||
|
/cache
|
||||||
|
omnisharp.json
|
||||||
|
thread_*.json
|
||||||
|
*.js
|
@ -1,6 +1,7 @@
|
|||||||
namespace QuestReader;
|
namespace QuestReader.Models;
|
||||||
|
|
||||||
using System.Text.Json.Serialization;
|
using System.Text.Json.Serialization;
|
||||||
|
using QuestReader.Models.ParsedContent;
|
||||||
|
|
||||||
public record ThreadPost
|
public record ThreadPost
|
||||||
{
|
{
|
||||||
@ -8,6 +9,7 @@ public record ThreadPost
|
|||||||
public string Author { get; set; }
|
public string Author { get; set; }
|
||||||
public string Uid { get; set; }
|
public string Uid { get; set; }
|
||||||
public string RawHtml { get; set; }
|
public string RawHtml { get; set; }
|
||||||
|
public ParsedContent.ParsedContent? ParsedContent { get; set; }
|
||||||
public string? File { get; set; }
|
public string? File { get; set; }
|
||||||
public string? Filename { get; set; }
|
public string? Filename { get; set; }
|
||||||
public string? Title { get; set; }
|
public string? Title { get; set; }
|
||||||
@ -42,29 +44,4 @@ public record ChapterMetadata
|
|||||||
public int Start { get; set; }
|
public int Start { get; set; }
|
||||||
public int? Announce { get; set; }
|
public int? Announce { get; set; }
|
||||||
public int End { get; set; }
|
public int End { get; set; }
|
||||||
}
|
|
||||||
|
|
||||||
public enum ParamType
|
|
||||||
{
|
|
||||||
Invalid,
|
|
||||||
PostId,
|
|
||||||
UniqueId,
|
|
||||||
Username
|
|
||||||
}
|
|
||||||
|
|
||||||
public enum ParamError
|
|
||||||
{
|
|
||||||
Invalid,
|
|
||||||
NoError,
|
|
||||||
NotFound
|
|
||||||
}
|
|
||||||
|
|
||||||
public class TemplateModel
|
|
||||||
{
|
|
||||||
public Metadata Metadata { get; set; }
|
|
||||||
public DateTime Now { get; set; }
|
|
||||||
public List<ThreadPost> Posts { get; set; }
|
|
||||||
public List<ThreadPost> AllPosts { get; set; }
|
|
||||||
public string BaseUrl { get; set; }
|
|
||||||
public string ToolVersion { get; set; }
|
|
||||||
}
|
}
|
123
Models/ParsedContent.cs
Normal file
123
Models/ParsedContent.cs
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
using System.Linq.Expressions;
|
||||||
|
using System.Runtime.Serialization;
|
||||||
|
using System.Text.Json;
|
||||||
|
using System.Text.Json.Serialization;
|
||||||
|
using System.Web;
|
||||||
|
|
||||||
|
namespace QuestReader.Models.ParsedContent;
|
||||||
|
|
||||||
|
public class ParsedContent
|
||||||
|
{
|
||||||
|
public Version Version { get; set; }
|
||||||
|
public IList<ContentNode> Nodes { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
class ContentConverter : JsonConverter<ContentNode>
|
||||||
|
{
|
||||||
|
public override ContentNode Read(
|
||||||
|
ref Utf8JsonReader reader,
|
||||||
|
Type typeToConvert,
|
||||||
|
JsonSerializerOptions options
|
||||||
|
)
|
||||||
|
{
|
||||||
|
throw new NotImplementedException();
|
||||||
|
}
|
||||||
|
|
||||||
|
public override void Write(
|
||||||
|
Utf8JsonWriter writer,
|
||||||
|
ContentNode value,
|
||||||
|
JsonSerializerOptions options)
|
||||||
|
{
|
||||||
|
switch (value)
|
||||||
|
{
|
||||||
|
case null:
|
||||||
|
JsonSerializer.Serialize(writer, null as ContentNode, options);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
if (value is RootNode)
|
||||||
|
throw new InvalidDataContractException("RootNode must not be used");
|
||||||
|
var type = value.GetType();
|
||||||
|
|
||||||
|
JsonSerializer.Serialize(writer, value, type, options);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[JsonConverter(typeof(ContentConverter))]
|
||||||
|
public abstract class ContentNode
|
||||||
|
{
|
||||||
|
public string Type { get => GetType().Name.Replace("Node", ""); }
|
||||||
|
|
||||||
|
public virtual string Render(TemplateModel model)
|
||||||
|
{
|
||||||
|
throw new NotImplementedException("Rendering is not supported for this node type");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public class TextNode : ContentNode
|
||||||
|
{
|
||||||
|
public string Text { get; set; }
|
||||||
|
|
||||||
|
public override string ToString() => $"\"{Text}\"";
|
||||||
|
|
||||||
|
public override string Render(TemplateModel model) => HttpUtility.HtmlEncode(Text);
|
||||||
|
}
|
||||||
|
|
||||||
|
public class NewlineNode : ContentNode
|
||||||
|
{
|
||||||
|
public override string ToString() => $"<br>";
|
||||||
|
|
||||||
|
public override string Render(TemplateModel model) => "<br>";
|
||||||
|
}
|
||||||
|
|
||||||
|
public class ReferenceNode : ContentNode
|
||||||
|
{
|
||||||
|
public int PostId { get; set; }
|
||||||
|
public int ThreadId { get; set; }
|
||||||
|
public ReferenceType ReferenceType { get; set; }
|
||||||
|
public bool LongReference { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public enum ReferenceType
|
||||||
|
{
|
||||||
|
QuestActive,
|
||||||
|
QuestArchive,
|
||||||
|
QuestDiscussion
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract class ContainerNode : ContentNode
|
||||||
|
{
|
||||||
|
public IList<ContentNode> Nodes { get; set; }
|
||||||
|
|
||||||
|
public override string ToString() => $"{Type} [ {string.Join(",\n", Nodes)} ]";
|
||||||
|
}
|
||||||
|
|
||||||
|
// A temporary container to recursively parse everything of a note before bailing and MUST NOT BE USED NORMALLY
|
||||||
|
public class RootNode : ContainerNode
|
||||||
|
{
|
||||||
|
public override string ToString() => throw new InvalidDataContractException("RootNode must not be used");
|
||||||
|
|
||||||
|
public override string Render(TemplateModel model) => throw new InvalidDataContractException("RootNode must not be used");
|
||||||
|
}
|
||||||
|
|
||||||
|
public class QuoteNode : ContainerNode { };
|
||||||
|
|
||||||
|
public class BoldNode : ContainerNode { };
|
||||||
|
|
||||||
|
public class ItalicsNode : ContainerNode { };
|
||||||
|
|
||||||
|
public class StrikeoutNode : ContainerNode { };
|
||||||
|
|
||||||
|
public class SpoilerNode : ContainerNode { };
|
||||||
|
|
||||||
|
public class InlineCodeNode : ContainerNode { };
|
||||||
|
|
||||||
|
public class UnderlineNode : ContainerNode { };
|
||||||
|
|
||||||
|
public class ExternalLinkNode : ContainerNode
|
||||||
|
{
|
||||||
|
public string Destination { get; set; }
|
||||||
|
}
|
11
Models/Template.cs
Normal file
11
Models/Template.cs
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
namespace QuestReader.Models;
|
||||||
|
|
||||||
|
public class TemplateModel
|
||||||
|
{
|
||||||
|
public Metadata Metadata { get; set; }
|
||||||
|
public DateTime Now { get; set; }
|
||||||
|
public List<ThreadPost> Posts { get; set; }
|
||||||
|
public List<ThreadPost> AllPosts { get; set; }
|
||||||
|
public string BaseUrl { get; set; }
|
||||||
|
public string ToolVersion { get; set; }
|
||||||
|
}
|
@ -1,6 +1,7 @@
|
|||||||
namespace QuestReader.Services;
|
namespace QuestReader.Services;
|
||||||
|
|
||||||
using System.Reflection;
|
using System.Reflection;
|
||||||
|
using QuestReader.Models;
|
||||||
|
|
||||||
public class Generator
|
public class Generator
|
||||||
{
|
{
|
||||||
@ -21,7 +22,8 @@ public class Generator
|
|||||||
|
|
||||||
var chapterAnnounces = PostsSource.Metadata.Chapters.Select(c => c.Announce ?? c.Start);
|
var chapterAnnounces = PostsSource.Metadata.Chapters.Select(c => c.Announce ?? c.Start);
|
||||||
|
|
||||||
PostsSource.Accepted.Where(p => chapterAnnounces.Contains(p.Id)).ToList().ForEach(p => {
|
PostsSource.Accepted.Where(p => chapterAnnounces.Contains(p.Id)).ToList().ForEach(p =>
|
||||||
|
{
|
||||||
p.IsChapterAnnounce = true;
|
p.IsChapterAnnounce = true;
|
||||||
p.Chapter = PostsSource.Metadata.Chapters.Single(c => (c.Announce ?? c.Start) == p.Id);
|
p.Chapter = PostsSource.Metadata.Chapters.Single(c => (c.Announce ?? c.Start) == p.Id);
|
||||||
});
|
});
|
||||||
@ -38,7 +40,7 @@ public class Generator
|
|||||||
|
|
||||||
public string Run()
|
public string Run()
|
||||||
{
|
{
|
||||||
RazorTemplate.Model = new TemplateModel
|
RazorTemplate.Model = new TemplateModel
|
||||||
{
|
{
|
||||||
Metadata = PostsSource.Metadata,
|
Metadata = PostsSource.Metadata,
|
||||||
Posts = PostsSource.Accepted,
|
Posts = PostsSource.Accepted,
|
||||||
|
@ -3,6 +3,7 @@ namespace QuestReader.Services;
|
|||||||
using System.Text.Json;
|
using System.Text.Json;
|
||||||
using System.Text.Json.Serialization;
|
using System.Text.Json.Serialization;
|
||||||
using System.Text.RegularExpressions;
|
using System.Text.RegularExpressions;
|
||||||
|
using QuestReader.Models;
|
||||||
|
|
||||||
public class PostsSource
|
public class PostsSource
|
||||||
{
|
{
|
||||||
@ -27,7 +28,8 @@ public class PostsSource
|
|||||||
fileStream.Dispose();
|
fileStream.Dispose();
|
||||||
|
|
||||||
Console.Out.WriteLine($"Loaded metadata: {Metadata}");
|
Console.Out.WriteLine($"Loaded metadata: {Metadata}");
|
||||||
Posts = Metadata.Threads.SelectMany(tId => {
|
Posts = Metadata.Threads.SelectMany(tId =>
|
||||||
|
{
|
||||||
using var fileStream = File.OpenRead(Path.Combine(basePath, $"thread_{tId}.json"));
|
using var fileStream = File.OpenRead(Path.Combine(basePath, $"thread_{tId}.json"));
|
||||||
var threadData = JsonSerializer.Deserialize<List<ThreadPost>>(fileStream, options)
|
var threadData = JsonSerializer.Deserialize<List<ThreadPost>>(fileStream, options)
|
||||||
?? throw new InvalidDataException("Empty deserialisation result for thread data");
|
?? throw new InvalidDataException("Empty deserialisation result for thread data");
|
||||||
|
269
Services/QuestdenParse.cs
Normal file
269
Services/QuestdenParse.cs
Normal file
@ -0,0 +1,269 @@
|
|||||||
|
using System.Globalization;
|
||||||
|
using System.Text.Json;
|
||||||
|
using System.Text.Json.Serialization;
|
||||||
|
using System.Text.RegularExpressions;
|
||||||
|
using System.Web;
|
||||||
|
using HtmlAgilityPack;
|
||||||
|
using QuestReader.Models;
|
||||||
|
using QuestReader.Models.ParsedContent;
|
||||||
|
|
||||||
|
namespace QuestReader.Services;
|
||||||
|
|
||||||
|
public class QuestdenParse
|
||||||
|
{
|
||||||
|
static readonly Version LatestCompatibleVersion = new(1, 0, 2);
|
||||||
|
|
||||||
|
static Regex RefRegex { get; } = new Regex(@"^ref\|(questarch|questdis|quest)\|(\d+)\|(\d+)$", RegexOptions.Compiled);
|
||||||
|
|
||||||
|
static Regex LongRefRegex { get; } = new Regex(@"(?:https?://)?(www.)?(tgchan|questden).org/kusaba/(questarch|questdis|quest)/res/(\d+).html#?i?(\d+)?$", RegexOptions.Compiled);
|
||||||
|
|
||||||
|
static Regex DateRegex { get; } = new Regex(@"(\d{4,4})\/(\d\d)\/(\d\d)\(\w+\)(\d\d):(\d\d)", RegexOptions.Compiled);
|
||||||
|
|
||||||
|
static Regex FilenameRegex { get; } = new Regex(@"File \d+\.[^ ]+ - \([\d\.KMG]+B , \d+x\d+ , (.*) \)", RegexOptions.Compiled);
|
||||||
|
|
||||||
|
public static async Task GetThread(int threadId)
|
||||||
|
{
|
||||||
|
var url = $"http://questden.org/kusaba/quest/res/{threadId}.html";
|
||||||
|
var options = new JsonSerializerOptions
|
||||||
|
{
|
||||||
|
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
||||||
|
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||||
|
WriteIndented = true
|
||||||
|
};
|
||||||
|
|
||||||
|
var doc = new HtmlDocument();
|
||||||
|
doc.OptionEmptyCollection = true;
|
||||||
|
|
||||||
|
if (File.Exists($"thread_{threadId}.json"))
|
||||||
|
return;
|
||||||
|
|
||||||
|
var cacheFile = $"cache/QuestDen-{threadId}.html";
|
||||||
|
if (!File.Exists(cacheFile))
|
||||||
|
{
|
||||||
|
var httpClient = new HttpClient();
|
||||||
|
var content = await httpClient.GetStringAsync(url);
|
||||||
|
if (!Directory.Exists("cache"))
|
||||||
|
Directory.CreateDirectory("cache");
|
||||||
|
File.WriteAllText(cacheFile, content);
|
||||||
|
doc.LoadHtml(content);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
doc.LoadHtml(File.ReadAllText(cacheFile));
|
||||||
|
}
|
||||||
|
|
||||||
|
var nodes = doc.DocumentNode.SelectNodes(".//*[@class='reply']|.//form[@id='delform']");
|
||||||
|
|
||||||
|
var posts = new List<ThreadPost>();
|
||||||
|
foreach (var node in nodes)
|
||||||
|
{
|
||||||
|
var post = ParsePost(node, threadId);
|
||||||
|
posts.Add(post);
|
||||||
|
//var postJson = JsonSerializer.Serialize(post);
|
||||||
|
//Console.Out.WriteLine($"{postJson}\n");
|
||||||
|
}
|
||||||
|
File.WriteAllText($"thread_{threadId}.json", JsonSerializer.Serialize(posts, options));
|
||||||
|
}
|
||||||
|
public static ThreadPost ParsePost(string postHtml, int threadId)
|
||||||
|
{
|
||||||
|
var htmlDoc = new HtmlDocument();
|
||||||
|
htmlDoc.LoadHtml(postHtml);
|
||||||
|
return ParsePost(htmlDoc.DocumentNode.FirstChild, threadId);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ThreadPost ParsePost(HtmlNode postNode, int threadId)
|
||||||
|
{
|
||||||
|
var post = new ThreadPost { };
|
||||||
|
|
||||||
|
var id = postNode
|
||||||
|
.SelectNodes("./div[@class='postwidth']/a[@name!='s']")
|
||||||
|
.Single()
|
||||||
|
.Attributes["name"].Value.Trim();
|
||||||
|
post.Id = id == "s" ? threadId : int.Parse(id);
|
||||||
|
post.Title = postNode
|
||||||
|
.SelectNodes("./div[@class='postwidth']//*[@class='filetitle']")
|
||||||
|
.SingleOrDefault()
|
||||||
|
?.InnerText.Trim();
|
||||||
|
post.Author = postNode
|
||||||
|
.SelectNodes("./div[@class='postwidth']//*[@class='postername']")
|
||||||
|
.Single()
|
||||||
|
.InnerText.Trim();
|
||||||
|
post.Uid = postNode
|
||||||
|
.SelectNodes("./div[@class='postwidth']//*[@class='uid']")
|
||||||
|
.Single()
|
||||||
|
.InnerText.Trim()
|
||||||
|
.Replace("ID: ", "", true, CultureInfo.InvariantCulture);
|
||||||
|
post.Date = DateTime.Parse(
|
||||||
|
DateRegex.Replace(postNode
|
||||||
|
.SelectNodes("./div[@class='postwidth']/label/text()[last()]")
|
||||||
|
.Single()
|
||||||
|
.InnerText.Trim(),
|
||||||
|
"$1-$2-$3T$4:$5"
|
||||||
|
),
|
||||||
|
null,
|
||||||
|
DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal
|
||||||
|
);
|
||||||
|
post.File = postNode
|
||||||
|
.SelectNodes("./div[@class='postwidth']//*[@class='filesize']/a")
|
||||||
|
.SingleOrDefault()
|
||||||
|
?.Attributes["href"].Value.Trim();
|
||||||
|
|
||||||
|
var filenameRaw = postNode
|
||||||
|
.SelectNodes("./div[@class='postwidth']//*[@class='filesize']")
|
||||||
|
.SingleOrDefault()
|
||||||
|
?.InnerText.Trim();
|
||||||
|
if (filenameRaw is not null)
|
||||||
|
{
|
||||||
|
filenameRaw = Regex.Replace(filenameRaw, @"\s*\n\s*", " ", RegexOptions.Multiline);
|
||||||
|
post.Filename = filenameRaw.Contains("File ") ?
|
||||||
|
FilenameRegex.Match(filenameRaw)?.Groups[1]?.Value
|
||||||
|
?? null : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
post.Tripcode = postNode
|
||||||
|
.SelectNodes("./div[@class='postwidth']//*[@class='postertrip']")
|
||||||
|
.SingleOrDefault()
|
||||||
|
?.InnerText.Trim();
|
||||||
|
post.RawHtml = Regex.Replace(
|
||||||
|
postNode
|
||||||
|
.SelectNodes("./blockquote")
|
||||||
|
.Single()
|
||||||
|
.InnerHtml
|
||||||
|
.Replace("\r", " ")
|
||||||
|
.Replace(@"<div style=""display:inline-block; width:400px;""></div><br>", "")
|
||||||
|
.Trim(),
|
||||||
|
@"\s*<br\s*\/?>$",
|
||||||
|
""
|
||||||
|
);
|
||||||
|
try
|
||||||
|
{
|
||||||
|
post.ParsedContent = ParseContent(post.RawHtml);
|
||||||
|
}
|
||||||
|
catch (FormatException)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"\n{post.Id} {post.RawHtml.Replace("\r", "")}\n");
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
|
||||||
|
return post;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ParsedContent ParseContent(string postHtml)
|
||||||
|
{
|
||||||
|
var htmlDoc = new HtmlDocument();
|
||||||
|
htmlDoc.LoadHtml(postHtml);
|
||||||
|
var rootNode = RecursiveParse(htmlDoc.DocumentNode);
|
||||||
|
if (rootNode is not RootNode)
|
||||||
|
throw new Exception("Parsing returned a non-RootNode root");
|
||||||
|
return new ParsedContent
|
||||||
|
{
|
||||||
|
Version = LatestCompatibleVersion,
|
||||||
|
Nodes = ((RootNode)rootNode).Nodes
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private static ContentNode RecursiveParse(HtmlNode node, ContentNode? parentNode = null)
|
||||||
|
{
|
||||||
|
if (node is null)
|
||||||
|
throw new NullReferenceException("Html node is null");
|
||||||
|
|
||||||
|
if (node is HtmlTextNode textNode)
|
||||||
|
return new TextNode { Text = HttpUtility.HtmlDecode(textNode.Text.Trim()) };
|
||||||
|
|
||||||
|
if (node.NodeType is HtmlNodeType.Document or HtmlNodeType.Element)
|
||||||
|
{
|
||||||
|
ContentNode outNode = node.Name.ToLowerInvariant() switch
|
||||||
|
{
|
||||||
|
"a" when
|
||||||
|
node.GetClasses().Count() == 1
|
||||||
|
&& node.FirstChild?.NodeType == HtmlNodeType.Text
|
||||||
|
&& node.Descendants().Count() == 1
|
||||||
|
&& node.GetClasses().Single() is var classname
|
||||||
|
&& RefRegex.Match(classname) is var match && match is not null
|
||||||
|
&& match.Success
|
||||||
|
&& HttpUtility.HtmlDecode(node.FirstChild?.InnerText) is var innerText && innerText is not null
|
||||||
|
&& (innerText == $">>{match.Groups[3].Value}" || innerText == $">>/{match.Groups[1].Value}/{match.Groups[3].Value}")
|
||||||
|
=> new ReferenceNode
|
||||||
|
{
|
||||||
|
PostId = int.Parse(match.Groups[3].Value),
|
||||||
|
ThreadId = int.Parse(match.Groups[2].Value),
|
||||||
|
ReferenceType = match.Groups[1].Value switch
|
||||||
|
{
|
||||||
|
"quest" => ReferenceType.QuestActive,
|
||||||
|
"questarch" => ReferenceType.QuestArchive,
|
||||||
|
"questdis" => ReferenceType.QuestDiscussion,
|
||||||
|
_ => throw new InvalidDataException(""),
|
||||||
|
},
|
||||||
|
LongReference = false
|
||||||
|
},
|
||||||
|
"a" when
|
||||||
|
!node.GetClasses().Any()
|
||||||
|
&& node.FirstChild is HtmlTextNode firstNode && firstNode is not null
|
||||||
|
&& node.Descendants().Count() == 1
|
||||||
|
&& HttpUtility.HtmlDecode(firstNode.Text) is var nodeText
|
||||||
|
&& node.GetAttributeValue("href", "ERROR") == nodeText
|
||||||
|
&& LongRefRegex.Match(nodeText) is var match && match is not null
|
||||||
|
&& match.Success
|
||||||
|
=> new ReferenceNode
|
||||||
|
{
|
||||||
|
PostId = int.Parse((match.Groups[5]?.Success ?? false) ? match.Groups[5].Value : match.Groups[4].Value),
|
||||||
|
ThreadId = int.Parse(match.Groups[4].Value),
|
||||||
|
LongReference = true
|
||||||
|
},
|
||||||
|
"a" when !node.GetClasses().Any() => new ExternalLinkNode { Destination = node.GetAttributeValue("href", "ERROR") },
|
||||||
|
"br" => new NewlineNode { },
|
||||||
|
"#document" => new RootNode { },
|
||||||
|
"i" => new ItalicsNode { },
|
||||||
|
"b" => new BoldNode { },
|
||||||
|
"strike" => new StrikeoutNode { },
|
||||||
|
"span" when
|
||||||
|
node.GetClasses() is var classes
|
||||||
|
&& classes.Count() == 1
|
||||||
|
&& classes.Single() == "spoiler" => new SpoilerNode { },
|
||||||
|
"span" when
|
||||||
|
node.GetClasses() is var classes
|
||||||
|
&& classes.Count() == 1
|
||||||
|
&& classes.Single() == "unkfunc" => new QuoteNode { },
|
||||||
|
"span" when
|
||||||
|
node.GetAttributes() is var attributes
|
||||||
|
&& attributes.Count() == 1
|
||||||
|
&& attributes.Single() is var maybeStyle
|
||||||
|
&& maybeStyle.Name == "style"
|
||||||
|
&& maybeStyle.DeEntitizeValue == @"border-bottom: 1px solid"
|
||||||
|
=> new UnderlineNode { },
|
||||||
|
"span" when
|
||||||
|
node.Descendants().Where(
|
||||||
|
d => d is not HtmlTextNode
|
||||||
|
|| (d is HtmlTextNode textNode
|
||||||
|
&& !string.IsNullOrWhiteSpace(textNode.Text.Trim()))
|
||||||
|
) is var descendants
|
||||||
|
&& descendants.Count() == 1
|
||||||
|
&& descendants.Single() is HtmlNode innerNode
|
||||||
|
&& innerNode.Name == "iframe"
|
||||||
|
&& innerNode.GetAttributeValue("src", null).Contains("youtube")
|
||||||
|
=> new TextNode { Text = $"Here be youtube link {innerNode.GetAttributeValue("src", null)}"},
|
||||||
|
"div" when
|
||||||
|
node.GetAttributes() is var attributes
|
||||||
|
&& attributes.Count() == 1
|
||||||
|
&& attributes.Single() is var maybeStyle
|
||||||
|
&& maybeStyle.Name == "style"
|
||||||
|
&& maybeStyle.DeEntitizeValue == @"white-space: pre-wrap !important; font-family: monospace, monospace !important;"
|
||||||
|
=> new InlineCodeNode { },
|
||||||
|
_ => throw new InvalidDataException($"Unknown node parse attempt: {node.Name} #{node.Id} .{string.Join(".", node.GetClasses())}\n{node.OuterHtml}")
|
||||||
|
};
|
||||||
|
//if (outNode is ExternalLinkNode refNode)
|
||||||
|
//Console.Out.WriteLine($"Refnode: {string.Join(", ", node.GetClasses())} {node.OuterHtml}");
|
||||||
|
//Console.Out.WriteLine($"{node.Name}: {outNode.GetType().Name} {outNode is ContainerNode} {node.ChildNodes.Count} children, {node.Descendants().Count()} descendants");
|
||||||
|
if (outNode is ContainerNode container)
|
||||||
|
{
|
||||||
|
container.Nodes = node.ChildNodes
|
||||||
|
.Select(n => RecursiveParse(n, container))
|
||||||
|
.Where(n => n is not TextNode || (n is TextNode textNode && !string.IsNullOrWhiteSpace(textNode.Text)))
|
||||||
|
.ToList();
|
||||||
|
}
|
||||||
|
return outNode;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Exception("Unsupported HTML node type");
|
||||||
|
}
|
||||||
|
}
|
@ -1,3 +1,5 @@
|
|||||||
|
namespace QuestReader.Services;
|
||||||
|
|
||||||
using System.Reflection;
|
using System.Reflection;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
using Microsoft.AspNetCore.Razor.Language;
|
using Microsoft.AspNetCore.Razor.Language;
|
||||||
@ -6,8 +8,6 @@ using Microsoft.CodeAnalysis;
|
|||||||
using Microsoft.CodeAnalysis.CSharp;
|
using Microsoft.CodeAnalysis.CSharp;
|
||||||
using Microsoft.CodeAnalysis.Emit;
|
using Microsoft.CodeAnalysis.Emit;
|
||||||
|
|
||||||
namespace QuestReader;
|
|
||||||
|
|
||||||
public class RazorStandalone<TTemplate>
|
public class RazorStandalone<TTemplate>
|
||||||
{
|
{
|
||||||
RazorProjectEngine Engine { get; set; }
|
RazorProjectEngine Engine { get; set; }
|
||||||
@ -63,7 +63,7 @@ public class RazorStandalone<TTemplate>
|
|||||||
}
|
}
|
||||||
|
|
||||||
var asm = Assembly.Load(memoryStream.ToArray());
|
var asm = Assembly.Load(memoryStream.ToArray());
|
||||||
var templateInstance = (TTemplate?) Activator.CreateInstance(asm.GetType("QuestReader.Template"));
|
var templateInstance = (TTemplate?)Activator.CreateInstance(asm.GetType("QuestReader.Template"));
|
||||||
if (templateInstance is null)
|
if (templateInstance is null)
|
||||||
throw new Exception("Template is null");
|
throw new Exception("Template is null");
|
||||||
|
|
||||||
@ -127,7 +127,7 @@ public abstract class StandaloneTemplate<TModel>
|
|||||||
await Output.WriteAsync(literal);
|
await Output.WriteAsync(literal);
|
||||||
}
|
}
|
||||||
|
|
||||||
string? Suffix {get;set;}
|
string? Suffix { get; set; }
|
||||||
|
|
||||||
public async Task BeginWriteAttributeAsync(
|
public async Task BeginWriteAttributeAsync(
|
||||||
string name,
|
string name,
|
||||||
@ -145,7 +145,8 @@ public abstract class StandaloneTemplate<TModel>
|
|||||||
await WriteAsync(value);
|
await WriteAsync(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
public async Task EndWriteAttributeAsync() {
|
public async Task EndWriteAttributeAsync()
|
||||||
|
{
|
||||||
await WriteLiteralAsync(Suffix!);
|
await WriteLiteralAsync(Suffix!);
|
||||||
Suffix = null;
|
Suffix = null;
|
||||||
}
|
}
|
65
kusaba.js
65
kusaba.js
@ -1,65 +0,0 @@
|
|||||||
await (async () => {
|
|
||||||
delete Array.prototype.toJSON;
|
|
||||||
|
|
||||||
const processReply = (elem) => {
|
|
||||||
const id = +elem.querySelector(":scope > .postwidth > a[name]:not([name=s])").getAttribute("name");
|
|
||||||
const title = elem.querySelector(":scope > .postwidth .filetitle")?.innerText.trim();
|
|
||||||
const author = elem.querySelector(":scope > .postwidth .postername").innerText.trim();
|
|
||||||
const uid = elem.querySelector(":scope > .postwidth .uid").innerText.replace("ID: ", "");
|
|
||||||
const file = elem.querySelector(":scope > .postwidth > .filesize > a")?.href ?? undefined;
|
|
||||||
const postertrip = elem.querySelector(":scope > .postwidth .postertrip")?.innerText.trim();
|
|
||||||
const rawHtml = elem.querySelector(":scope > blockquote").innerHTML
|
|
||||||
.replace(`<div style="display:inline-block; width:400px;"></div><br>`,"")
|
|
||||||
.trim();
|
|
||||||
const date = [...elem.querySelector(":scope > .postwidth > label").childNodes]
|
|
||||||
.pop().data.trim()
|
|
||||||
.replace(
|
|
||||||
/(\d{4,4})\/(\d\d)\/(\d\d)\(\w+\)(\d\d):(\d\d)/,
|
|
||||||
"$1-$2-$3T$4:$5:00Z"
|
|
||||||
)
|
|
||||||
.replace(
|
|
||||||
/href=\\"\/kusaba\/questarch\/res\/\d+.html#\d+\\" onclick=\\"return highlight\('\d+', true\);\\"/,
|
|
||||||
""
|
|
||||||
);
|
|
||||||
|
|
||||||
const filenameRaw = elem.querySelector(":scope > .postwidth > .filesize")?.innerText;
|
|
||||||
const filename = filenameRaw?.includes("File ") ?
|
|
||||||
filenameRaw.match(/File \d+\.[^ ]+ - \([\d\.KMG]+B , \d+x\d+ , (.*) \)/)[1]
|
|
||||||
?? undefined : undefined;
|
|
||||||
const ret = {
|
|
||||||
id,
|
|
||||||
author,
|
|
||||||
uid,
|
|
||||||
rawHtml,
|
|
||||||
date
|
|
||||||
}
|
|
||||||
if (file) ret.file = file;
|
|
||||||
if (file) ret.filename = filename;
|
|
||||||
if (postertrip) ret.tripcode = postertrip;
|
|
||||||
if (title) ret.title = title;
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
const replies = [...document.getElementsByClassName("reply")];
|
|
||||||
replies.unshift(document.getElementById("delform"))
|
|
||||||
|
|
||||||
const processed = replies.map(elem => processReply(elem));
|
|
||||||
|
|
||||||
const blob = new Blob(
|
|
||||||
[JSON.stringify(processed, null, 4)],
|
|
||||||
{type : 'application/json'}
|
|
||||||
)
|
|
||||||
|
|
||||||
const a = document.createElement("a");
|
|
||||||
const url = URL.createObjectURL(blob);
|
|
||||||
a.href = url;
|
|
||||||
a.download = `thread_${processed[0].id}.json`;
|
|
||||||
document.body.appendChild(a);
|
|
||||||
a.click();
|
|
||||||
URL.revokeObjectURL(url);
|
|
||||||
a.remove();
|
|
||||||
|
|
||||||
|
|
||||||
return ;
|
|
||||||
|
|
||||||
})();
|
|
@ -1,6 +1,8 @@
|
|||||||
@namespace QuestReader
|
@namespace QuestReader
|
||||||
@using System
|
@using System
|
||||||
@using System.Linq
|
@using System.Linq
|
||||||
|
@using QuestReader.Models
|
||||||
|
@using QuestReader.Services
|
||||||
@inherits StandaloneTemplate<TemplateModel>
|
@inherits StandaloneTemplate<TemplateModel>
|
||||||
|
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<PropertyGroup>
|
<PropertyGroup>
|
||||||
<VersionPrefix>1.0.1</VersionPrefix>
|
<VersionPrefix>1.0.1</VersionPrefix>
|
||||||
|
<RootNamespace>QuestReader</RootNamespace>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Target Name="SetSourceRevisionId" BeforeTargets="InitializeSourceControlInformation">
|
<Target Name="SetSourceRevisionId" BeforeTargets="InitializeSourceControlInformation">
|
||||||
<Exec
|
<Exec
|
||||||
|
161
web/main.css
Normal file
161
web/main.css
Normal file
@ -0,0 +1,161 @@
|
|||||||
|
* {
|
||||||
|
box-sizing: border-box;
|
||||||
|
}
|
||||||
|
|
||||||
|
:root {
|
||||||
|
color-scheme: light dark;
|
||||||
|
}
|
||||||
|
|
||||||
|
body {
|
||||||
|
--bg-light: #f4f4f7;
|
||||||
|
--fg-light: #444;
|
||||||
|
--bg-gray: #383840;
|
||||||
|
--fg-gray: #aaa;
|
||||||
|
--bg-dark: #242427;
|
||||||
|
--fg-dark: #ddd;
|
||||||
|
--fg-muted: #888;
|
||||||
|
--highlight-blue-muted: #2F568480;
|
||||||
|
--highlight-blue: #2F5684;
|
||||||
|
--highlight-magenta: #D00FA0;
|
||||||
|
--highlight-blue-bright: #5F86F4;
|
||||||
|
--highlight-magenta-bright: #E058B0;
|
||||||
|
--highlight-magenta-semitransparent: #D00FA0B2;
|
||||||
|
--stripe-color: var(--highlight-blue);
|
||||||
|
font-family: "Helvetica", sans-serif;
|
||||||
|
line-height: 1.2em;
|
||||||
|
padding: 0;
|
||||||
|
margin: 0;
|
||||||
|
background: var(--bg-dark);
|
||||||
|
color: var(--fg-dark);
|
||||||
|
height: 100%;
|
||||||
|
width: 100%;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
justify-content: center;
|
||||||
|
align-items: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.dark {
|
||||||
|
--bg-light: #242427;
|
||||||
|
--fg-light: #ddd;
|
||||||
|
--bg-gray: #18181A;
|
||||||
|
--fg-gray: #888;
|
||||||
|
--bg-dark: #101010;
|
||||||
|
--fg-dark: #c0c0c8;
|
||||||
|
--fg-muted: #888;
|
||||||
|
--highlight-blue-muted: #5F86F480;
|
||||||
|
--highlight-blue: #5F86F4;
|
||||||
|
--highlight-magenta: #E058B0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@media (prefers-color-scheme: dark) {
|
||||||
|
body {
|
||||||
|
--bg-light: #242427;
|
||||||
|
--fg-light: #ddd;
|
||||||
|
--bg-gray: #18181A;
|
||||||
|
--fg-gray: #888;
|
||||||
|
--bg-dark: #101010;
|
||||||
|
--fg-dark: #c0c0c8;
|
||||||
|
--fg-muted: #888;
|
||||||
|
--highlight-blue-muted: #5F86F480;
|
||||||
|
--highlight-blue: #5F86F4;
|
||||||
|
--highlight-magenta: #E058B0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main, header, footer {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
max-width: 1000px;
|
||||||
|
width: 95%;
|
||||||
|
}
|
||||||
|
|
||||||
|
a {
|
||||||
|
color: var(--highlight-blue);
|
||||||
|
}
|
||||||
|
a:visited {
|
||||||
|
color: var(--highlight-blue);
|
||||||
|
}
|
||||||
|
|
||||||
|
.post {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
background: var(--bg-light);
|
||||||
|
color: var(--fg-light);
|
||||||
|
}
|
||||||
|
|
||||||
|
.post.suggestion-post {
|
||||||
|
--highlight-blue-muted: #5F86F480;
|
||||||
|
--highlight-blue: #5F86F4;
|
||||||
|
--highlight-magenta: #E058B0;
|
||||||
|
--fg-light: #aaa;
|
||||||
|
background-color: var(--bg-gray);
|
||||||
|
color: var(--fg-gray);
|
||||||
|
margin-left: 4em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.post-self-title {
|
||||||
|
width: 95%;
|
||||||
|
align-self: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.post-header {
|
||||||
|
margin: 1rem 0;
|
||||||
|
font-size: 16px;
|
||||||
|
max-width: 600px;
|
||||||
|
width: 95%;
|
||||||
|
align-self: center;
|
||||||
|
color: var(--fg-muted);
|
||||||
|
}
|
||||||
|
|
||||||
|
.post-content {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
}
|
||||||
|
|
||||||
|
.post-image {
|
||||||
|
margin: 0;
|
||||||
|
align-self: center;
|
||||||
|
max-width: 95%;
|
||||||
|
}
|
||||||
|
|
||||||
|
.post-image img {
|
||||||
|
max-width: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
.post-text {
|
||||||
|
padding: 16px 40px;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
.chapter-announce {
|
||||||
|
width: 90%;
|
||||||
|
align-self: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
a.post-anchor {
|
||||||
|
color: var(--fg-muted);
|
||||||
|
transition: color ease-in-out 0.2s;
|
||||||
|
}
|
||||||
|
a.post-anchor:hover {
|
||||||
|
color: var(--highlight-blue);
|
||||||
|
}
|
||||||
|
.post-anchor-mark {
|
||||||
|
color: var(--highlight-blue-muted);
|
||||||
|
transition: color ease-in-out 0.2s;
|
||||||
|
}
|
||||||
|
a.post-anchor:hover .post-anchor-mark {
|
||||||
|
color: var(--highlight-blue);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* In-chapter stuff */
|
||||||
|
|
||||||
|
.quoted-text {
|
||||||
|
color: var(--fg-muted);
|
||||||
|
}
|
||||||
|
|
||||||
|
.post-reference {
|
||||||
|
color: var(--highlight-blue);
|
||||||
|
}
|
38
web/main.ts
Normal file
38
web/main.ts
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
class VisitAnalytics {
|
||||||
|
observer;
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
window.plausible = window.plausible || function () { (window.plausible.q = window.plausible.q || []).push(arguments); };
|
||||||
|
if (document.readyState == "interactive")
|
||||||
|
this.init()
|
||||||
|
else
|
||||||
|
document.addEventListener("DOMContentLoaded", () => this.init(), false);
|
||||||
|
}
|
||||||
|
|
||||||
|
init() {
|
||||||
|
let options = {
|
||||||
|
root: null,
|
||||||
|
rootMargin: "0px",
|
||||||
|
threshold: 1.0
|
||||||
|
};
|
||||||
|
|
||||||
|
this.observer = new IntersectionObserver((entries, observer) => this.handleIntersect(entries, observer), options);
|
||||||
|
|
||||||
|
var all = document.querySelectorAll(".chapter-announce");
|
||||||
|
all.forEach(elem => this.observer.observe(elem));
|
||||||
|
this.observer.observe(document.querySelector("footer"));
|
||||||
|
console.log("Intersection observer ready");
|
||||||
|
}
|
||||||
|
|
||||||
|
handleIntersect(entries: IntersectionObserverEntry[], observer: IntersectionObserver) {
|
||||||
|
entries.filter(e => e.isIntersecting).forEach(e => {
|
||||||
|
if (e.target.className == "chapter-announce" && !e.target.nextElementSibling.querySelector("img").complete)
|
||||||
|
return;
|
||||||
|
window.plausible("landmark", {props: {id: e.target.id ? e.target.id : e.target.tagName}});
|
||||||
|
observer.unobserve(e.target);
|
||||||
|
console.log("Reached landmark " + e.target.id ? e.target.id : e.target.tagName);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
new VisitAnalytics();
|
1
web/tsconfig.json
Normal file
1
web/tsconfig.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{}
|
Loading…
Reference in New Issue
Block a user