Add basic HTML parsing
This commit is contained in:
parent
387909b0c6
commit
a7250ceacf
6
.gitignore
vendored
6
.gitignore
vendored
@ -2,4 +2,8 @@
|
||||
/quests
|
||||
/rust
|
||||
/obj
|
||||
/bin
|
||||
/bin
|
||||
/cache
|
||||
omnisharp.json
|
||||
thread_*.json
|
||||
*.js
|
@ -1,6 +1,7 @@
|
||||
namespace QuestReader;
|
||||
namespace QuestReader.Models;
|
||||
|
||||
using System.Text.Json.Serialization;
|
||||
using QuestReader.Models.ParsedContent;
|
||||
|
||||
public record ThreadPost
|
||||
{
|
||||
@ -8,6 +9,7 @@ public record ThreadPost
|
||||
public string Author { get; set; }
|
||||
public string Uid { get; set; }
|
||||
public string RawHtml { get; set; }
|
||||
public ParsedContent.ParsedContent? ParsedContent { get; set; }
|
||||
public string? File { get; set; }
|
||||
public string? Filename { get; set; }
|
||||
public string? Title { get; set; }
|
||||
@ -42,29 +44,4 @@ public record ChapterMetadata
|
||||
public int Start { get; set; }
|
||||
public int? Announce { get; set; }
|
||||
public int End { get; set; }
|
||||
}
|
||||
|
||||
public enum ParamType
|
||||
{
|
||||
Invalid,
|
||||
PostId,
|
||||
UniqueId,
|
||||
Username
|
||||
}
|
||||
|
||||
public enum ParamError
|
||||
{
|
||||
Invalid,
|
||||
NoError,
|
||||
NotFound
|
||||
}
|
||||
|
||||
public class TemplateModel
|
||||
{
|
||||
public Metadata Metadata { get; set; }
|
||||
public DateTime Now { get; set; }
|
||||
public List<ThreadPost> Posts { get; set; }
|
||||
public List<ThreadPost> AllPosts { get; set; }
|
||||
public string BaseUrl { get; set; }
|
||||
public string ToolVersion { get; set; }
|
||||
}
|
123
Models/ParsedContent.cs
Normal file
123
Models/ParsedContent.cs
Normal file
@ -0,0 +1,123 @@
|
||||
using System.Linq.Expressions;
|
||||
using System.Runtime.Serialization;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Web;
|
||||
|
||||
namespace QuestReader.Models.ParsedContent;
|
||||
|
||||
public class ParsedContent
|
||||
{
|
||||
public Version Version { get; set; }
|
||||
public IList<ContentNode> Nodes { get; set; }
|
||||
}
|
||||
|
||||
class ContentConverter : JsonConverter<ContentNode>
|
||||
{
|
||||
public override ContentNode Read(
|
||||
ref Utf8JsonReader reader,
|
||||
Type typeToConvert,
|
||||
JsonSerializerOptions options
|
||||
)
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
public override void Write(
|
||||
Utf8JsonWriter writer,
|
||||
ContentNode value,
|
||||
JsonSerializerOptions options)
|
||||
{
|
||||
switch (value)
|
||||
{
|
||||
case null:
|
||||
JsonSerializer.Serialize(writer, null as ContentNode, options);
|
||||
break;
|
||||
default:
|
||||
{
|
||||
if (value is RootNode)
|
||||
throw new InvalidDataContractException("RootNode must not be used");
|
||||
var type = value.GetType();
|
||||
|
||||
JsonSerializer.Serialize(writer, value, type, options);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[JsonConverter(typeof(ContentConverter))]
|
||||
public abstract class ContentNode
|
||||
{
|
||||
public string Type { get => GetType().Name.Replace("Node", ""); }
|
||||
|
||||
public virtual string Render(TemplateModel model)
|
||||
{
|
||||
throw new NotImplementedException("Rendering is not supported for this node type");
|
||||
}
|
||||
}
|
||||
|
||||
public class TextNode : ContentNode
|
||||
{
|
||||
public string Text { get; set; }
|
||||
|
||||
public override string ToString() => $"\"{Text}\"";
|
||||
|
||||
public override string Render(TemplateModel model) => HttpUtility.HtmlEncode(Text);
|
||||
}
|
||||
|
||||
public class NewlineNode : ContentNode
|
||||
{
|
||||
public override string ToString() => $"<br>";
|
||||
|
||||
public override string Render(TemplateModel model) => "<br>";
|
||||
}
|
||||
|
||||
public class ReferenceNode : ContentNode
|
||||
{
|
||||
public int PostId { get; set; }
|
||||
public int ThreadId { get; set; }
|
||||
public ReferenceType ReferenceType { get; set; }
|
||||
public bool LongReference { get; set; }
|
||||
}
|
||||
|
||||
public enum ReferenceType
|
||||
{
|
||||
QuestActive,
|
||||
QuestArchive,
|
||||
QuestDiscussion
|
||||
}
|
||||
|
||||
public abstract class ContainerNode : ContentNode
|
||||
{
|
||||
public IList<ContentNode> Nodes { get; set; }
|
||||
|
||||
public override string ToString() => $"{Type} [ {string.Join(",\n", Nodes)} ]";
|
||||
}
|
||||
|
||||
// A temporary container to recursively parse everything of a note before bailing and MUST NOT BE USED NORMALLY
|
||||
public class RootNode : ContainerNode
|
||||
{
|
||||
public override string ToString() => throw new InvalidDataContractException("RootNode must not be used");
|
||||
|
||||
public override string Render(TemplateModel model) => throw new InvalidDataContractException("RootNode must not be used");
|
||||
}
|
||||
|
||||
public class QuoteNode : ContainerNode { };
|
||||
|
||||
public class BoldNode : ContainerNode { };
|
||||
|
||||
public class ItalicsNode : ContainerNode { };
|
||||
|
||||
public class StrikeoutNode : ContainerNode { };
|
||||
|
||||
public class SpoilerNode : ContainerNode { };
|
||||
|
||||
public class InlineCodeNode : ContainerNode { };
|
||||
|
||||
public class UnderlineNode : ContainerNode { };
|
||||
|
||||
public class ExternalLinkNode : ContainerNode
|
||||
{
|
||||
public string Destination { get; set; }
|
||||
}
|
11
Models/Template.cs
Normal file
11
Models/Template.cs
Normal file
@ -0,0 +1,11 @@
|
||||
namespace QuestReader.Models;
|
||||
|
||||
public class TemplateModel
|
||||
{
|
||||
public Metadata Metadata { get; set; }
|
||||
public DateTime Now { get; set; }
|
||||
public List<ThreadPost> Posts { get; set; }
|
||||
public List<ThreadPost> AllPosts { get; set; }
|
||||
public string BaseUrl { get; set; }
|
||||
public string ToolVersion { get; set; }
|
||||
}
|
@ -1,6 +1,7 @@
|
||||
namespace QuestReader.Services;
|
||||
|
||||
using System.Reflection;
|
||||
using QuestReader.Models;
|
||||
|
||||
public class Generator
|
||||
{
|
||||
@ -21,7 +22,8 @@ public class Generator
|
||||
|
||||
var chapterAnnounces = PostsSource.Metadata.Chapters.Select(c => c.Announce ?? c.Start);
|
||||
|
||||
PostsSource.Accepted.Where(p => chapterAnnounces.Contains(p.Id)).ToList().ForEach(p => {
|
||||
PostsSource.Accepted.Where(p => chapterAnnounces.Contains(p.Id)).ToList().ForEach(p =>
|
||||
{
|
||||
p.IsChapterAnnounce = true;
|
||||
p.Chapter = PostsSource.Metadata.Chapters.Single(c => (c.Announce ?? c.Start) == p.Id);
|
||||
});
|
||||
@ -38,7 +40,7 @@ public class Generator
|
||||
|
||||
public string Run()
|
||||
{
|
||||
RazorTemplate.Model = new TemplateModel
|
||||
RazorTemplate.Model = new TemplateModel
|
||||
{
|
||||
Metadata = PostsSource.Metadata,
|
||||
Posts = PostsSource.Accepted,
|
||||
|
@ -3,6 +3,7 @@ namespace QuestReader.Services;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Text.RegularExpressions;
|
||||
using QuestReader.Models;
|
||||
|
||||
public class PostsSource
|
||||
{
|
||||
@ -27,7 +28,8 @@ public class PostsSource
|
||||
fileStream.Dispose();
|
||||
|
||||
Console.Out.WriteLine($"Loaded metadata: {Metadata}");
|
||||
Posts = Metadata.Threads.SelectMany(tId => {
|
||||
Posts = Metadata.Threads.SelectMany(tId =>
|
||||
{
|
||||
using var fileStream = File.OpenRead(Path.Combine(basePath, $"thread_{tId}.json"));
|
||||
var threadData = JsonSerializer.Deserialize<List<ThreadPost>>(fileStream, options)
|
||||
?? throw new InvalidDataException("Empty deserialisation result for thread data");
|
||||
|
269
Services/QuestdenParse.cs
Normal file
269
Services/QuestdenParse.cs
Normal file
@ -0,0 +1,269 @@
|
||||
using System.Globalization;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Web;
|
||||
using HtmlAgilityPack;
|
||||
using QuestReader.Models;
|
||||
using QuestReader.Models.ParsedContent;
|
||||
|
||||
namespace QuestReader.Services;
|
||||
|
||||
public class QuestdenParse
|
||||
{
|
||||
static readonly Version LatestCompatibleVersion = new(1, 0, 2);
|
||||
|
||||
static Regex RefRegex { get; } = new Regex(@"^ref\|(questarch|questdis|quest)\|(\d+)\|(\d+)$", RegexOptions.Compiled);
|
||||
|
||||
static Regex LongRefRegex { get; } = new Regex(@"(?:https?://)?(www.)?(tgchan|questden).org/kusaba/(questarch|questdis|quest)/res/(\d+).html#?i?(\d+)?$", RegexOptions.Compiled);
|
||||
|
||||
static Regex DateRegex { get; } = new Regex(@"(\d{4,4})\/(\d\d)\/(\d\d)\(\w+\)(\d\d):(\d\d)", RegexOptions.Compiled);
|
||||
|
||||
static Regex FilenameRegex { get; } = new Regex(@"File \d+\.[^ ]+ - \([\d\.KMG]+B , \d+x\d+ , (.*) \)", RegexOptions.Compiled);
|
||||
|
||||
public static async Task GetThread(int threadId)
|
||||
{
|
||||
var url = $"http://questden.org/kusaba/quest/res/{threadId}.html";
|
||||
var options = new JsonSerializerOptions
|
||||
{
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
WriteIndented = true
|
||||
};
|
||||
|
||||
var doc = new HtmlDocument();
|
||||
doc.OptionEmptyCollection = true;
|
||||
|
||||
if (File.Exists($"thread_{threadId}.json"))
|
||||
return;
|
||||
|
||||
var cacheFile = $"cache/QuestDen-{threadId}.html";
|
||||
if (!File.Exists(cacheFile))
|
||||
{
|
||||
var httpClient = new HttpClient();
|
||||
var content = await httpClient.GetStringAsync(url);
|
||||
if (!Directory.Exists("cache"))
|
||||
Directory.CreateDirectory("cache");
|
||||
File.WriteAllText(cacheFile, content);
|
||||
doc.LoadHtml(content);
|
||||
}
|
||||
else
|
||||
{
|
||||
doc.LoadHtml(File.ReadAllText(cacheFile));
|
||||
}
|
||||
|
||||
var nodes = doc.DocumentNode.SelectNodes(".//*[@class='reply']|.//form[@id='delform']");
|
||||
|
||||
var posts = new List<ThreadPost>();
|
||||
foreach (var node in nodes)
|
||||
{
|
||||
var post = ParsePost(node, threadId);
|
||||
posts.Add(post);
|
||||
//var postJson = JsonSerializer.Serialize(post);
|
||||
//Console.Out.WriteLine($"{postJson}\n");
|
||||
}
|
||||
File.WriteAllText($"thread_{threadId}.json", JsonSerializer.Serialize(posts, options));
|
||||
}
|
||||
public static ThreadPost ParsePost(string postHtml, int threadId)
|
||||
{
|
||||
var htmlDoc = new HtmlDocument();
|
||||
htmlDoc.LoadHtml(postHtml);
|
||||
return ParsePost(htmlDoc.DocumentNode.FirstChild, threadId);
|
||||
}
|
||||
|
||||
public static ThreadPost ParsePost(HtmlNode postNode, int threadId)
|
||||
{
|
||||
var post = new ThreadPost { };
|
||||
|
||||
var id = postNode
|
||||
.SelectNodes("./div[@class='postwidth']/a[@name!='s']")
|
||||
.Single()
|
||||
.Attributes["name"].Value.Trim();
|
||||
post.Id = id == "s" ? threadId : int.Parse(id);
|
||||
post.Title = postNode
|
||||
.SelectNodes("./div[@class='postwidth']//*[@class='filetitle']")
|
||||
.SingleOrDefault()
|
||||
?.InnerText.Trim();
|
||||
post.Author = postNode
|
||||
.SelectNodes("./div[@class='postwidth']//*[@class='postername']")
|
||||
.Single()
|
||||
.InnerText.Trim();
|
||||
post.Uid = postNode
|
||||
.SelectNodes("./div[@class='postwidth']//*[@class='uid']")
|
||||
.Single()
|
||||
.InnerText.Trim()
|
||||
.Replace("ID: ", "", true, CultureInfo.InvariantCulture);
|
||||
post.Date = DateTime.Parse(
|
||||
DateRegex.Replace(postNode
|
||||
.SelectNodes("./div[@class='postwidth']/label/text()[last()]")
|
||||
.Single()
|
||||
.InnerText.Trim(),
|
||||
"$1-$2-$3T$4:$5"
|
||||
),
|
||||
null,
|
||||
DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal
|
||||
);
|
||||
post.File = postNode
|
||||
.SelectNodes("./div[@class='postwidth']//*[@class='filesize']/a")
|
||||
.SingleOrDefault()
|
||||
?.Attributes["href"].Value.Trim();
|
||||
|
||||
var filenameRaw = postNode
|
||||
.SelectNodes("./div[@class='postwidth']//*[@class='filesize']")
|
||||
.SingleOrDefault()
|
||||
?.InnerText.Trim();
|
||||
if (filenameRaw is not null)
|
||||
{
|
||||
filenameRaw = Regex.Replace(filenameRaw, @"\s*\n\s*", " ", RegexOptions.Multiline);
|
||||
post.Filename = filenameRaw.Contains("File ") ?
|
||||
FilenameRegex.Match(filenameRaw)?.Groups[1]?.Value
|
||||
?? null : null;
|
||||
}
|
||||
|
||||
post.Tripcode = postNode
|
||||
.SelectNodes("./div[@class='postwidth']//*[@class='postertrip']")
|
||||
.SingleOrDefault()
|
||||
?.InnerText.Trim();
|
||||
post.RawHtml = Regex.Replace(
|
||||
postNode
|
||||
.SelectNodes("./blockquote")
|
||||
.Single()
|
||||
.InnerHtml
|
||||
.Replace("\r", " ")
|
||||
.Replace(@"<div style=""display:inline-block; width:400px;""></div><br>", "")
|
||||
.Trim(),
|
||||
@"\s*<br\s*\/?>$",
|
||||
""
|
||||
);
|
||||
try
|
||||
{
|
||||
post.ParsedContent = ParseContent(post.RawHtml);
|
||||
}
|
||||
catch (FormatException)
|
||||
{
|
||||
Console.WriteLine($"\n{post.Id} {post.RawHtml.Replace("\r", "")}\n");
|
||||
throw;
|
||||
}
|
||||
|
||||
return post;
|
||||
}
|
||||
|
||||
public static ParsedContent ParseContent(string postHtml)
|
||||
{
|
||||
var htmlDoc = new HtmlDocument();
|
||||
htmlDoc.LoadHtml(postHtml);
|
||||
var rootNode = RecursiveParse(htmlDoc.DocumentNode);
|
||||
if (rootNode is not RootNode)
|
||||
throw new Exception("Parsing returned a non-RootNode root");
|
||||
return new ParsedContent
|
||||
{
|
||||
Version = LatestCompatibleVersion,
|
||||
Nodes = ((RootNode)rootNode).Nodes
|
||||
};
|
||||
}
|
||||
|
||||
private static ContentNode RecursiveParse(HtmlNode node, ContentNode? parentNode = null)
|
||||
{
|
||||
if (node is null)
|
||||
throw new NullReferenceException("Html node is null");
|
||||
|
||||
if (node is HtmlTextNode textNode)
|
||||
return new TextNode { Text = HttpUtility.HtmlDecode(textNode.Text.Trim()) };
|
||||
|
||||
if (node.NodeType is HtmlNodeType.Document or HtmlNodeType.Element)
|
||||
{
|
||||
ContentNode outNode = node.Name.ToLowerInvariant() switch
|
||||
{
|
||||
"a" when
|
||||
node.GetClasses().Count() == 1
|
||||
&& node.FirstChild?.NodeType == HtmlNodeType.Text
|
||||
&& node.Descendants().Count() == 1
|
||||
&& node.GetClasses().Single() is var classname
|
||||
&& RefRegex.Match(classname) is var match && match is not null
|
||||
&& match.Success
|
||||
&& HttpUtility.HtmlDecode(node.FirstChild?.InnerText) is var innerText && innerText is not null
|
||||
&& (innerText == $">>{match.Groups[3].Value}" || innerText == $">>/{match.Groups[1].Value}/{match.Groups[3].Value}")
|
||||
=> new ReferenceNode
|
||||
{
|
||||
PostId = int.Parse(match.Groups[3].Value),
|
||||
ThreadId = int.Parse(match.Groups[2].Value),
|
||||
ReferenceType = match.Groups[1].Value switch
|
||||
{
|
||||
"quest" => ReferenceType.QuestActive,
|
||||
"questarch" => ReferenceType.QuestArchive,
|
||||
"questdis" => ReferenceType.QuestDiscussion,
|
||||
_ => throw new InvalidDataException(""),
|
||||
},
|
||||
LongReference = false
|
||||
},
|
||||
"a" when
|
||||
!node.GetClasses().Any()
|
||||
&& node.FirstChild is HtmlTextNode firstNode && firstNode is not null
|
||||
&& node.Descendants().Count() == 1
|
||||
&& HttpUtility.HtmlDecode(firstNode.Text) is var nodeText
|
||||
&& node.GetAttributeValue("href", "ERROR") == nodeText
|
||||
&& LongRefRegex.Match(nodeText) is var match && match is not null
|
||||
&& match.Success
|
||||
=> new ReferenceNode
|
||||
{
|
||||
PostId = int.Parse((match.Groups[5]?.Success ?? false) ? match.Groups[5].Value : match.Groups[4].Value),
|
||||
ThreadId = int.Parse(match.Groups[4].Value),
|
||||
LongReference = true
|
||||
},
|
||||
"a" when !node.GetClasses().Any() => new ExternalLinkNode { Destination = node.GetAttributeValue("href", "ERROR") },
|
||||
"br" => new NewlineNode { },
|
||||
"#document" => new RootNode { },
|
||||
"i" => new ItalicsNode { },
|
||||
"b" => new BoldNode { },
|
||||
"strike" => new StrikeoutNode { },
|
||||
"span" when
|
||||
node.GetClasses() is var classes
|
||||
&& classes.Count() == 1
|
||||
&& classes.Single() == "spoiler" => new SpoilerNode { },
|
||||
"span" when
|
||||
node.GetClasses() is var classes
|
||||
&& classes.Count() == 1
|
||||
&& classes.Single() == "unkfunc" => new QuoteNode { },
|
||||
"span" when
|
||||
node.GetAttributes() is var attributes
|
||||
&& attributes.Count() == 1
|
||||
&& attributes.Single() is var maybeStyle
|
||||
&& maybeStyle.Name == "style"
|
||||
&& maybeStyle.DeEntitizeValue == @"border-bottom: 1px solid"
|
||||
=> new UnderlineNode { },
|
||||
"span" when
|
||||
node.Descendants().Where(
|
||||
d => d is not HtmlTextNode
|
||||
|| (d is HtmlTextNode textNode
|
||||
&& !string.IsNullOrWhiteSpace(textNode.Text.Trim()))
|
||||
) is var descendants
|
||||
&& descendants.Count() == 1
|
||||
&& descendants.Single() is HtmlNode innerNode
|
||||
&& innerNode.Name == "iframe"
|
||||
&& innerNode.GetAttributeValue("src", null).Contains("youtube")
|
||||
=> new TextNode { Text = $"Here be youtube link {innerNode.GetAttributeValue("src", null)}"},
|
||||
"div" when
|
||||
node.GetAttributes() is var attributes
|
||||
&& attributes.Count() == 1
|
||||
&& attributes.Single() is var maybeStyle
|
||||
&& maybeStyle.Name == "style"
|
||||
&& maybeStyle.DeEntitizeValue == @"white-space: pre-wrap !important; font-family: monospace, monospace !important;"
|
||||
=> new InlineCodeNode { },
|
||||
_ => throw new InvalidDataException($"Unknown node parse attempt: {node.Name} #{node.Id} .{string.Join(".", node.GetClasses())}\n{node.OuterHtml}")
|
||||
};
|
||||
//if (outNode is ExternalLinkNode refNode)
|
||||
//Console.Out.WriteLine($"Refnode: {string.Join(", ", node.GetClasses())} {node.OuterHtml}");
|
||||
//Console.Out.WriteLine($"{node.Name}: {outNode.GetType().Name} {outNode is ContainerNode} {node.ChildNodes.Count} children, {node.Descendants().Count()} descendants");
|
||||
if (outNode is ContainerNode container)
|
||||
{
|
||||
container.Nodes = node.ChildNodes
|
||||
.Select(n => RecursiveParse(n, container))
|
||||
.Where(n => n is not TextNode || (n is TextNode textNode && !string.IsNullOrWhiteSpace(textNode.Text)))
|
||||
.ToList();
|
||||
}
|
||||
return outNode;
|
||||
}
|
||||
|
||||
throw new Exception("Unsupported HTML node type");
|
||||
}
|
||||
}
|
@ -1,3 +1,5 @@
|
||||
namespace QuestReader.Services;
|
||||
|
||||
using System.Reflection;
|
||||
using System.Text;
|
||||
using Microsoft.AspNetCore.Razor.Language;
|
||||
@ -6,8 +8,6 @@ using Microsoft.CodeAnalysis;
|
||||
using Microsoft.CodeAnalysis.CSharp;
|
||||
using Microsoft.CodeAnalysis.Emit;
|
||||
|
||||
namespace QuestReader;
|
||||
|
||||
public class RazorStandalone<TTemplate>
|
||||
{
|
||||
RazorProjectEngine Engine { get; set; }
|
||||
@ -63,7 +63,7 @@ public class RazorStandalone<TTemplate>
|
||||
}
|
||||
|
||||
var asm = Assembly.Load(memoryStream.ToArray());
|
||||
var templateInstance = (TTemplate?) Activator.CreateInstance(asm.GetType("QuestReader.Template"));
|
||||
var templateInstance = (TTemplate?)Activator.CreateInstance(asm.GetType("QuestReader.Template"));
|
||||
if (templateInstance is null)
|
||||
throw new Exception("Template is null");
|
||||
|
||||
@ -127,7 +127,7 @@ public abstract class StandaloneTemplate<TModel>
|
||||
await Output.WriteAsync(literal);
|
||||
}
|
||||
|
||||
string? Suffix {get;set;}
|
||||
string? Suffix { get; set; }
|
||||
|
||||
public async Task BeginWriteAttributeAsync(
|
||||
string name,
|
||||
@ -145,7 +145,8 @@ public abstract class StandaloneTemplate<TModel>
|
||||
await WriteAsync(value);
|
||||
}
|
||||
|
||||
public async Task EndWriteAttributeAsync() {
|
||||
public async Task EndWriteAttributeAsync()
|
||||
{
|
||||
await WriteLiteralAsync(Suffix!);
|
||||
Suffix = null;
|
||||
}
|
65
kusaba.js
65
kusaba.js
@ -1,65 +0,0 @@
|
||||
await (async () => {
|
||||
delete Array.prototype.toJSON;
|
||||
|
||||
const processReply = (elem) => {
|
||||
const id = +elem.querySelector(":scope > .postwidth > a[name]:not([name=s])").getAttribute("name");
|
||||
const title = elem.querySelector(":scope > .postwidth .filetitle")?.innerText.trim();
|
||||
const author = elem.querySelector(":scope > .postwidth .postername").innerText.trim();
|
||||
const uid = elem.querySelector(":scope > .postwidth .uid").innerText.replace("ID: ", "");
|
||||
const file = elem.querySelector(":scope > .postwidth > .filesize > a")?.href ?? undefined;
|
||||
const postertrip = elem.querySelector(":scope > .postwidth .postertrip")?.innerText.trim();
|
||||
const rawHtml = elem.querySelector(":scope > blockquote").innerHTML
|
||||
.replace(`<div style="display:inline-block; width:400px;"></div><br>`,"")
|
||||
.trim();
|
||||
const date = [...elem.querySelector(":scope > .postwidth > label").childNodes]
|
||||
.pop().data.trim()
|
||||
.replace(
|
||||
/(\d{4,4})\/(\d\d)\/(\d\d)\(\w+\)(\d\d):(\d\d)/,
|
||||
"$1-$2-$3T$4:$5:00Z"
|
||||
)
|
||||
.replace(
|
||||
/href=\\"\/kusaba\/questarch\/res\/\d+.html#\d+\\" onclick=\\"return highlight\('\d+', true\);\\"/,
|
||||
""
|
||||
);
|
||||
|
||||
const filenameRaw = elem.querySelector(":scope > .postwidth > .filesize")?.innerText;
|
||||
const filename = filenameRaw?.includes("File ") ?
|
||||
filenameRaw.match(/File \d+\.[^ ]+ - \([\d\.KMG]+B , \d+x\d+ , (.*) \)/)[1]
|
||||
?? undefined : undefined;
|
||||
const ret = {
|
||||
id,
|
||||
author,
|
||||
uid,
|
||||
rawHtml,
|
||||
date
|
||||
}
|
||||
if (file) ret.file = file;
|
||||
if (file) ret.filename = filename;
|
||||
if (postertrip) ret.tripcode = postertrip;
|
||||
if (title) ret.title = title;
|
||||
return ret;
|
||||
}
|
||||
|
||||
const replies = [...document.getElementsByClassName("reply")];
|
||||
replies.unshift(document.getElementById("delform"))
|
||||
|
||||
const processed = replies.map(elem => processReply(elem));
|
||||
|
||||
const blob = new Blob(
|
||||
[JSON.stringify(processed, null, 4)],
|
||||
{type : 'application/json'}
|
||||
)
|
||||
|
||||
const a = document.createElement("a");
|
||||
const url = URL.createObjectURL(blob);
|
||||
a.href = url;
|
||||
a.download = `thread_${processed[0].id}.json`;
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
URL.revokeObjectURL(url);
|
||||
a.remove();
|
||||
|
||||
|
||||
return ;
|
||||
|
||||
})();
|
@ -1,6 +1,8 @@
|
||||
@namespace QuestReader
|
||||
@using System
|
||||
@using System.Linq
|
||||
@using QuestReader.Models
|
||||
@using QuestReader.Services
|
||||
@inherits StandaloneTemplate<TemplateModel>
|
||||
|
||||
<!DOCTYPE html>
|
||||
|
@ -18,6 +18,7 @@
|
||||
</ItemGroup>
|
||||
<PropertyGroup>
|
||||
<VersionPrefix>1.0.1</VersionPrefix>
|
||||
<RootNamespace>QuestReader</RootNamespace>
|
||||
</PropertyGroup>
|
||||
<Target Name="SetSourceRevisionId" BeforeTargets="InitializeSourceControlInformation">
|
||||
<Exec
|
||||
|
161
web/main.css
Normal file
161
web/main.css
Normal file
@ -0,0 +1,161 @@
|
||||
* {
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
:root {
|
||||
color-scheme: light dark;
|
||||
}
|
||||
|
||||
body {
|
||||
--bg-light: #f4f4f7;
|
||||
--fg-light: #444;
|
||||
--bg-gray: #383840;
|
||||
--fg-gray: #aaa;
|
||||
--bg-dark: #242427;
|
||||
--fg-dark: #ddd;
|
||||
--fg-muted: #888;
|
||||
--highlight-blue-muted: #2F568480;
|
||||
--highlight-blue: #2F5684;
|
||||
--highlight-magenta: #D00FA0;
|
||||
--highlight-blue-bright: #5F86F4;
|
||||
--highlight-magenta-bright: #E058B0;
|
||||
--highlight-magenta-semitransparent: #D00FA0B2;
|
||||
--stripe-color: var(--highlight-blue);
|
||||
font-family: "Helvetica", sans-serif;
|
||||
line-height: 1.2em;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
background: var(--bg-dark);
|
||||
color: var(--fg-dark);
|
||||
height: 100%;
|
||||
width: 100%;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.dark {
|
||||
--bg-light: #242427;
|
||||
--fg-light: #ddd;
|
||||
--bg-gray: #18181A;
|
||||
--fg-gray: #888;
|
||||
--bg-dark: #101010;
|
||||
--fg-dark: #c0c0c8;
|
||||
--fg-muted: #888;
|
||||
--highlight-blue-muted: #5F86F480;
|
||||
--highlight-blue: #5F86F4;
|
||||
--highlight-magenta: #E058B0;
|
||||
}
|
||||
|
||||
@media (prefers-color-scheme: dark) {
|
||||
body {
|
||||
--bg-light: #242427;
|
||||
--fg-light: #ddd;
|
||||
--bg-gray: #18181A;
|
||||
--fg-gray: #888;
|
||||
--bg-dark: #101010;
|
||||
--fg-dark: #c0c0c8;
|
||||
--fg-muted: #888;
|
||||
--highlight-blue-muted: #5F86F480;
|
||||
--highlight-blue: #5F86F4;
|
||||
--highlight-magenta: #E058B0;
|
||||
}
|
||||
}
|
||||
|
||||
main, header, footer {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
max-width: 1000px;
|
||||
width: 95%;
|
||||
}
|
||||
|
||||
a {
|
||||
color: var(--highlight-blue);
|
||||
}
|
||||
a:visited {
|
||||
color: var(--highlight-blue);
|
||||
}
|
||||
|
||||
.post {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
margin-bottom: 1rem;
|
||||
background: var(--bg-light);
|
||||
color: var(--fg-light);
|
||||
}
|
||||
|
||||
.post.suggestion-post {
|
||||
--highlight-blue-muted: #5F86F480;
|
||||
--highlight-blue: #5F86F4;
|
||||
--highlight-magenta: #E058B0;
|
||||
--fg-light: #aaa;
|
||||
background-color: var(--bg-gray);
|
||||
color: var(--fg-gray);
|
||||
margin-left: 4em;
|
||||
}
|
||||
|
||||
.post-self-title {
|
||||
width: 95%;
|
||||
align-self: center;
|
||||
}
|
||||
|
||||
.post-header {
|
||||
margin: 1rem 0;
|
||||
font-size: 16px;
|
||||
max-width: 600px;
|
||||
width: 95%;
|
||||
align-self: center;
|
||||
color: var(--fg-muted);
|
||||
}
|
||||
|
||||
.post-content {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.post-image {
|
||||
margin: 0;
|
||||
align-self: center;
|
||||
max-width: 95%;
|
||||
}
|
||||
|
||||
.post-image img {
|
||||
max-width: 100%;
|
||||
}
|
||||
|
||||
.post-text {
|
||||
padding: 16px 40px;
|
||||
}
|
||||
|
||||
|
||||
.chapter-announce {
|
||||
width: 90%;
|
||||
align-self: center;
|
||||
}
|
||||
|
||||
a.post-anchor {
|
||||
color: var(--fg-muted);
|
||||
transition: color ease-in-out 0.2s;
|
||||
}
|
||||
a.post-anchor:hover {
|
||||
color: var(--highlight-blue);
|
||||
}
|
||||
.post-anchor-mark {
|
||||
color: var(--highlight-blue-muted);
|
||||
transition: color ease-in-out 0.2s;
|
||||
}
|
||||
a.post-anchor:hover .post-anchor-mark {
|
||||
color: var(--highlight-blue);
|
||||
}
|
||||
|
||||
|
||||
/* In-chapter stuff */
|
||||
|
||||
.quoted-text {
|
||||
color: var(--fg-muted);
|
||||
}
|
||||
|
||||
.post-reference {
|
||||
color: var(--highlight-blue);
|
||||
}
|
38
web/main.ts
Normal file
38
web/main.ts
Normal file
@ -0,0 +1,38 @@
|
||||
class VisitAnalytics {
|
||||
observer;
|
||||
|
||||
constructor() {
|
||||
window.plausible = window.plausible || function () { (window.plausible.q = window.plausible.q || []).push(arguments); };
|
||||
if (document.readyState == "interactive")
|
||||
this.init()
|
||||
else
|
||||
document.addEventListener("DOMContentLoaded", () => this.init(), false);
|
||||
}
|
||||
|
||||
init() {
|
||||
let options = {
|
||||
root: null,
|
||||
rootMargin: "0px",
|
||||
threshold: 1.0
|
||||
};
|
||||
|
||||
this.observer = new IntersectionObserver((entries, observer) => this.handleIntersect(entries, observer), options);
|
||||
|
||||
var all = document.querySelectorAll(".chapter-announce");
|
||||
all.forEach(elem => this.observer.observe(elem));
|
||||
this.observer.observe(document.querySelector("footer"));
|
||||
console.log("Intersection observer ready");
|
||||
}
|
||||
|
||||
handleIntersect(entries: IntersectionObserverEntry[], observer: IntersectionObserver) {
|
||||
entries.filter(e => e.isIntersecting).forEach(e => {
|
||||
if (e.target.className == "chapter-announce" && !e.target.nextElementSibling.querySelector("img").complete)
|
||||
return;
|
||||
window.plausible("landmark", {props: {id: e.target.id ? e.target.id : e.target.tagName}});
|
||||
observer.unobserve(e.target);
|
||||
console.log("Reached landmark " + e.target.id ? e.target.id : e.target.tagName);
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
new VisitAnalytics();
|
1
web/tsconfig.json
Normal file
1
web/tsconfig.json
Normal file
@ -0,0 +1 @@
|
||||
{}
|
Loading…
Reference in New Issue
Block a user