Get parsing working, and HTML generation from resulting objects
This commit is contained in:
parent
a7250ceacf
commit
38f317afd3
1
.gitignore
vendored
1
.gitignore
vendored
@ -4,6 +4,7 @@
|
|||||||
/obj
|
/obj
|
||||||
/bin
|
/bin
|
||||||
/cache
|
/cache
|
||||||
|
output.html
|
||||||
omnisharp.json
|
omnisharp.json
|
||||||
thread_*.json
|
thread_*.json
|
||||||
*.js
|
*.js
|
@ -9,7 +9,7 @@ public record ThreadPost
|
|||||||
public string Author { get; set; }
|
public string Author { get; set; }
|
||||||
public string Uid { get; set; }
|
public string Uid { get; set; }
|
||||||
public string RawHtml { get; set; }
|
public string RawHtml { get; set; }
|
||||||
public ParsedContent.ParsedContent? ParsedContent { get; set; }
|
public RootNode? ParsedContent { get; set; }
|
||||||
public string? File { get; set; }
|
public string? File { get; set; }
|
||||||
public string? Filename { get; set; }
|
public string? Filename { get; set; }
|
||||||
public string? Title { get; set; }
|
public string? Title { get; set; }
|
||||||
@ -18,8 +18,10 @@ public record ThreadPost
|
|||||||
|
|
||||||
[JsonIgnore]
|
[JsonIgnore]
|
||||||
public bool IsChapterAnnounce { get; set; } = false;
|
public bool IsChapterAnnounce { get; set; } = false;
|
||||||
|
[JsonIgnore]
|
||||||
public ChapterMetadata? Chapter { get; set; }
|
public ChapterMetadata? Chapter { get; set; }
|
||||||
public List<int>? RepliesTo { get; set; }
|
[JsonIgnore]
|
||||||
|
public bool AuthorPost { get; set; } = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public record Metadata
|
public record Metadata
|
||||||
|
@ -6,10 +6,28 @@ using System.Web;
|
|||||||
|
|
||||||
namespace QuestReader.Models.ParsedContent;
|
namespace QuestReader.Models.ParsedContent;
|
||||||
|
|
||||||
public class ParsedContent
|
[JsonConverter(typeof(ContentConverter))]
|
||||||
|
public abstract class ContentNode
|
||||||
|
{
|
||||||
|
public string Type { get => GetType().Name.Replace("Node", ""); }
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract class ContainerNode : ContentNode
|
||||||
{
|
{
|
||||||
public Version Version { get; set; }
|
|
||||||
public IList<ContentNode> Nodes { get; set; }
|
public IList<ContentNode> Nodes { get; set; }
|
||||||
|
|
||||||
|
public override string ToString() => $"{Type} [ {string.Join(",\n", Nodes)} ]";
|
||||||
|
|
||||||
|
public IEnumerable<int> GetReferences()
|
||||||
|
{
|
||||||
|
return Nodes.SelectMany(n =>
|
||||||
|
n is ContainerNode container
|
||||||
|
? container.GetReferences()
|
||||||
|
: (
|
||||||
|
n is ReferenceNode @ref ? new List<int> { @ref.PostId ?? @ref.ThreadId } : Array.Empty<int>()
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class ContentConverter : JsonConverter<ContentNode>
|
class ContentConverter : JsonConverter<ContentNode>
|
||||||
@ -28,54 +46,42 @@ class ContentConverter : JsonConverter<ContentNode>
|
|||||||
ContentNode value,
|
ContentNode value,
|
||||||
JsonSerializerOptions options)
|
JsonSerializerOptions options)
|
||||||
{
|
{
|
||||||
switch (value)
|
switch (value) {
|
||||||
{
|
|
||||||
case null:
|
case null:
|
||||||
JsonSerializer.Serialize(writer, null as ContentNode, options);
|
JsonSerializer.Serialize(writer, null as ContentNode, options);
|
||||||
break;
|
break;
|
||||||
|
case TextNode textNode:
|
||||||
|
JsonSerializer.Serialize(writer, textNode.Text, options);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
{
|
var type = value.GetType();
|
||||||
if (value is RootNode)
|
JsonSerializer.Serialize(writer, value, type, options);
|
||||||
throw new InvalidDataContractException("RootNode must not be used");
|
break;
|
||||||
var type = value.GetType();
|
};
|
||||||
|
|
||||||
JsonSerializer.Serialize(writer, value, type, options);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
[JsonConverter(typeof(ContentConverter))]
|
public class RootNode : ContainerNode
|
||||||
public abstract class ContentNode
|
|
||||||
{
|
{
|
||||||
public string Type { get => GetType().Name.Replace("Node", ""); }
|
public Version Version { get; set; }
|
||||||
|
|
||||||
public virtual string Render(TemplateModel model)
|
|
||||||
{
|
|
||||||
throw new NotImplementedException("Rendering is not supported for this node type");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public class TextNode : ContentNode
|
public class TextNode : ContentNode
|
||||||
{
|
{
|
||||||
public string Text { get; set; }
|
public string Text { get; set; }
|
||||||
|
|
||||||
public override string ToString() => $"\"{Text}\"";
|
public override string ToString() => $"{Text}";
|
||||||
|
|
||||||
public override string Render(TemplateModel model) => HttpUtility.HtmlEncode(Text);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public class NewlineNode : ContentNode
|
public class NewlineNode : ContentNode
|
||||||
{
|
{
|
||||||
public override string ToString() => $"<br>";
|
public override string ToString() => $"\n";
|
||||||
|
|
||||||
public override string Render(TemplateModel model) => "<br>";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public class ReferenceNode : ContentNode
|
public class ReferenceNode : ContentNode
|
||||||
{
|
{
|
||||||
public int PostId { get; set; }
|
public int? PostId { get; set; }
|
||||||
public int ThreadId { get; set; }
|
public int ThreadId { get; set; }
|
||||||
public ReferenceType ReferenceType { get; set; }
|
public ReferenceType ReferenceType { get; set; }
|
||||||
public bool LongReference { get; set; }
|
public bool LongReference { get; set; }
|
||||||
@ -88,20 +94,11 @@ public enum ReferenceType
|
|||||||
QuestDiscussion
|
QuestDiscussion
|
||||||
}
|
}
|
||||||
|
|
||||||
public abstract class ContainerNode : ContentNode
|
public class YoutubeEmbedNode : ContentNode
|
||||||
{
|
{
|
||||||
public IList<ContentNode> Nodes { get; set; }
|
/// <remarks>Todo: Make this a URL</remarks>
|
||||||
|
public string VideoLink { get; set; }
|
||||||
public override string ToString() => $"{Type} [ {string.Join(",\n", Nodes)} ]";
|
};
|
||||||
}
|
|
||||||
|
|
||||||
// A temporary container to recursively parse everything of a note before bailing and MUST NOT BE USED NORMALLY
|
|
||||||
public class RootNode : ContainerNode
|
|
||||||
{
|
|
||||||
public override string ToString() => throw new InvalidDataContractException("RootNode must not be used");
|
|
||||||
|
|
||||||
public override string Render(TemplateModel model) => throw new InvalidDataContractException("RootNode must not be used");
|
|
||||||
}
|
|
||||||
|
|
||||||
public class QuoteNode : ContainerNode { };
|
public class QuoteNode : ContainerNode { };
|
||||||
|
|
||||||
@ -117,7 +114,15 @@ public class InlineCodeNode : ContainerNode { };
|
|||||||
|
|
||||||
public class UnderlineNode : ContainerNode { };
|
public class UnderlineNode : ContainerNode { };
|
||||||
|
|
||||||
|
public class SmallFontNode : ContainerNode { };
|
||||||
|
|
||||||
|
public class ColorNode : ContainerNode
|
||||||
|
{
|
||||||
|
public string Color { get; set; }
|
||||||
|
};
|
||||||
|
|
||||||
public class ExternalLinkNode : ContainerNode
|
public class ExternalLinkNode : ContainerNode
|
||||||
{
|
{
|
||||||
|
/// <remarks>Todo: Make this a URL</remarks>
|
||||||
public string Destination { get; set; }
|
public string Destination { get; set; }
|
||||||
}
|
}
|
@ -6,6 +6,6 @@ public class TemplateModel
|
|||||||
public DateTime Now { get; set; }
|
public DateTime Now { get; set; }
|
||||||
public List<ThreadPost> Posts { get; set; }
|
public List<ThreadPost> Posts { get; set; }
|
||||||
public List<ThreadPost> AllPosts { get; set; }
|
public List<ThreadPost> AllPosts { get; set; }
|
||||||
public string BaseUrl { get; set; }
|
public string AssetsPath { get; set; }
|
||||||
public string ToolVersion { get; set; }
|
public string ToolVersion { get; set; }
|
||||||
}
|
}
|
@ -11,31 +11,23 @@ public class Generator
|
|||||||
|
|
||||||
public PostsSource PostsSource { get; set; }
|
public PostsSource PostsSource { get; set; }
|
||||||
|
|
||||||
public string QuestPath { get; set; }
|
public string AssetsPath { get; set; }
|
||||||
|
|
||||||
|
public string OutputPath { get; set; }
|
||||||
|
|
||||||
public Generator(string questName)
|
public Generator(string questName)
|
||||||
{
|
{
|
||||||
QuestPath = $"quests/{questName}";
|
|
||||||
|
|
||||||
QuestName = questName;
|
QuestName = questName;
|
||||||
PostsSource = new PostsSource(questName, QuestPath);
|
AssetsPath = $"/static/{questName}";
|
||||||
|
PostsSource = new PostsSource(questName);
|
||||||
var chapterAnnounces = PostsSource.Metadata.Chapters.Select(c => c.Announce ?? c.Start);
|
|
||||||
|
|
||||||
PostsSource.Accepted.Where(p => chapterAnnounces.Contains(p.Id)).ToList().ForEach(p =>
|
|
||||||
{
|
|
||||||
p.IsChapterAnnounce = true;
|
|
||||||
p.Chapter = PostsSource.Metadata.Chapters.Single(c => (c.Announce ?? c.Start) == p.Id);
|
|
||||||
});
|
|
||||||
|
|
||||||
var razorEngine = new RazorStandalone<StandaloneTemplate<TemplateModel>>("QuestReader");
|
var razorEngine = new RazorStandalone<StandaloneTemplate<TemplateModel>>("QuestReader");
|
||||||
var templateFile = "page_template.cshtml";
|
var templateFile = "page_template.cshtml";
|
||||||
var baseUrl = "";
|
|
||||||
RazorTemplate = razorEngine.Compile(
|
RazorTemplate = razorEngine.Compile(
|
||||||
"page_template.cshtml"
|
"page_template.cshtml"
|
||||||
) ?? throw new Exception("No template");
|
) ?? throw new Exception("No template");
|
||||||
|
|
||||||
Console.WriteLine($"Using \"{templateFile}\" with base URL {baseUrl}");
|
Console.WriteLine($"Using \"{templateFile}\" with base URL {AssetsPath}");
|
||||||
}
|
}
|
||||||
|
|
||||||
public string Run()
|
public string Run()
|
||||||
@ -43,17 +35,17 @@ public class Generator
|
|||||||
RazorTemplate.Model = new TemplateModel
|
RazorTemplate.Model = new TemplateModel
|
||||||
{
|
{
|
||||||
Metadata = PostsSource.Metadata,
|
Metadata = PostsSource.Metadata,
|
||||||
Posts = PostsSource.Accepted,
|
Posts = PostsSource.Accepted.ToList(),
|
||||||
AllPosts = PostsSource.Posts,
|
AllPosts = PostsSource.Posts,
|
||||||
Now = @DateTime.UtcNow,
|
Now = @DateTime.UtcNow,
|
||||||
BaseUrl = $"/static/{QuestName}",
|
AssetsPath = AssetsPath.TrimEnd('/'), // Strip trailing slash
|
||||||
ToolVersion = Assembly.GetEntryAssembly()?.GetCustomAttribute<AssemblyInformationalVersionAttribute>()?.InformationalVersion ?? "unknown"
|
ToolVersion = Assembly.GetEntryAssembly()?.GetCustomAttribute<AssemblyInformationalVersionAttribute>()?.InformationalVersion ?? "unknown"
|
||||||
};
|
};
|
||||||
|
|
||||||
var outputStream = new MemoryStream();
|
var outputStream = new MemoryStream();
|
||||||
RazorTemplate.ExecuteAsync(outputStream).Wait();
|
RazorTemplate.ExecuteAsync(outputStream).Wait();
|
||||||
|
|
||||||
var outputPath = Path.Join(QuestPath, "output.html");
|
var outputPath = Path.Join(OutputPath ?? PostsSource.BasePath, "output.html");
|
||||||
Console.WriteLine($"Template output {outputStream.Length} bytes");
|
Console.WriteLine($"Template output {outputStream.Length} bytes");
|
||||||
File.WriteAllBytes(outputPath, outputStream.ToArray());
|
File.WriteAllBytes(outputPath, outputStream.ToArray());
|
||||||
Console.WriteLine($"Wrote output to {outputPath}");
|
Console.WriteLine($"Wrote output to {outputPath}");
|
||||||
|
@ -9,12 +9,16 @@ public class PostsSource
|
|||||||
{
|
{
|
||||||
public List<ThreadPost> Posts { get; set; }
|
public List<ThreadPost> Posts { get; set; }
|
||||||
|
|
||||||
public List<ThreadPost> Accepted { get; set; }
|
public HashSet<ThreadPost> Accepted { get; set; }
|
||||||
|
|
||||||
public Metadata Metadata { get; set; }
|
public Metadata Metadata { get; set; }
|
||||||
|
|
||||||
public PostsSource(string questName, string basePath)
|
public string BasePath { get; set; }
|
||||||
|
|
||||||
|
public PostsSource(string questName)
|
||||||
{
|
{
|
||||||
|
BasePath = $"quests/{questName}";
|
||||||
|
|
||||||
var options = new JsonSerializerOptions
|
var options = new JsonSerializerOptions
|
||||||
{
|
{
|
||||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
||||||
@ -22,45 +26,41 @@ public class PostsSource
|
|||||||
WriteIndented = true
|
WriteIndented = true
|
||||||
};
|
};
|
||||||
|
|
||||||
using var fileStream = File.OpenRead(Path.Combine(basePath, "metadata.json"));
|
using var fileStream = File.OpenRead(Path.Combine(BasePath, "metadata.json"));
|
||||||
Metadata = JsonSerializer.Deserialize<Metadata>(fileStream, options)
|
Metadata = JsonSerializer.Deserialize<Metadata>(fileStream, options)
|
||||||
?? throw new InvalidDataException("Empty deserialisation result for quest metadata");
|
?? throw new InvalidDataException("Empty deserialisation result for quest metadata");
|
||||||
fileStream.Dispose();
|
fileStream.Dispose();
|
||||||
|
|
||||||
Console.Out.WriteLine($"Loaded metadata: {Metadata}");
|
Console.Out.WriteLine($"Loaded metadata: {Metadata}");
|
||||||
Posts = Metadata.Threads.SelectMany(tId =>
|
Posts = Metadata.Threads
|
||||||
{
|
.SelectMany(tId => QuestdenParse.GetThread(tId, BasePath).Result)
|
||||||
using var fileStream = File.OpenRead(Path.Combine(basePath, $"thread_{tId}.json"));
|
.ToList();
|
||||||
var threadData = JsonSerializer.Deserialize<List<ThreadPost>>(fileStream, options)
|
|
||||||
?? throw new InvalidDataException("Empty deserialisation result for thread data");
|
|
||||||
fileStream.Dispose();
|
|
||||||
|
|
||||||
return threadData;
|
using var postsListStream = File.OpenRead(Path.Combine(BasePath, "accepted.json"));
|
||||||
}).ToList();
|
|
||||||
|
|
||||||
using var postsListStream = File.OpenRead(Path.Combine(basePath, "accepted.json"));
|
|
||||||
var ids = JsonSerializer.Deserialize<List<int>>(postsListStream, options)
|
var ids = JsonSerializer.Deserialize<List<int>>(postsListStream, options)
|
||||||
?? throw new InvalidDataException("Empty deserialisation result for quest metadata");
|
?? throw new InvalidDataException("Empty deserialisation result for quest metadata");
|
||||||
Accepted = Posts.Where(p => ids.Contains(p.Id)).ToList();
|
Accepted = Posts.Where(p => ids.Contains(p.Id)).ToHashSet();
|
||||||
|
|
||||||
|
foreach (var chapter in Metadata.Chapters)
|
||||||
|
{
|
||||||
|
var post = Accepted.Single(p => p.Id == (chapter.Announce ?? chapter.Start));
|
||||||
|
post.IsChapterAnnounce = true;
|
||||||
|
post.Chapter = chapter;
|
||||||
|
}
|
||||||
|
|
||||||
Console.Out.WriteLine($"Loaded a list of {Accepted.Count} posts, referencing {Accepted.Where(a => a.File is not null).Count()} files");
|
Console.Out.WriteLine($"Loaded a list of {Accepted.Count} posts, referencing {Accepted.Where(a => a.File is not null).Count()} files");
|
||||||
|
|
||||||
var rx = new Regex(@"data-post-ref=""(\d+)""",
|
foreach (var post in Accepted)
|
||||||
RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
|
||||||
foreach (var post in Posts)
|
|
||||||
{
|
{
|
||||||
var matches = rx.Matches(post.RawHtml);
|
post.AuthorPost = true;
|
||||||
if (!matches.Any())
|
if (post.ParsedContent is null || post.ParsedContent.Version < QuestdenParse.LatestCompatibleVersion)
|
||||||
continue;
|
throw new NotImplementedException("Repairing missing post content or updating it is not implemented yet");
|
||||||
|
|
||||||
post.RepliesTo = new List<int>();
|
|
||||||
foreach (Match match in matches)
|
|
||||||
{
|
|
||||||
var replyId = int.Parse(match.Groups[1].Value);
|
|
||||||
var found = Posts.FirstOrDefault(p => p.Id == replyId);
|
|
||||||
if (found is null)
|
|
||||||
continue;
|
|
||||||
post.RepliesTo.Add(replyId);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var referenced = Accepted.SelectMany(p => p.ParsedContent!.GetReferences());
|
||||||
|
Accepted.UnionWith(Posts.Where(p => referenced.Contains(p.Id)));
|
||||||
|
Accepted = Accepted.OrderBy(p => p.Id).ToHashSet();
|
||||||
|
|
||||||
|
Console.Out.WriteLine($"Done loading with {Accepted.Count} posts, referencing {Accepted.Where(a => a.File is not null).Count()} files");
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -11,17 +11,17 @@ namespace QuestReader.Services;
|
|||||||
|
|
||||||
public class QuestdenParse
|
public class QuestdenParse
|
||||||
{
|
{
|
||||||
static readonly Version LatestCompatibleVersion = new(1, 0, 2);
|
public static readonly Version LatestCompatibleVersion = new(1, 0, 2);
|
||||||
|
|
||||||
static Regex RefRegex { get; } = new Regex(@"^ref\|(questarch|questdis|quest)\|(\d+)\|(\d+)$", RegexOptions.Compiled);
|
static Regex RefRegex { get; } = new Regex(@"^ref\|(questarch|questdis|quest)\|(\d+)\|(\d+)$", RegexOptions.Compiled);
|
||||||
|
|
||||||
static Regex LongRefRegex { get; } = new Regex(@"(?:https?://)?(www.)?(tgchan|questden).org/kusaba/(questarch|questdis|quest)/res/(\d+).html#?i?(\d+)?$", RegexOptions.Compiled);
|
static Regex LongRefRegex { get; } = new Regex(@"(?:https?://)?(?:www.)?(?:tgchan|questden).org/kusaba/(questarch|questdis|quest)/res/(\d+).html#?i?(\d+)?$", RegexOptions.Compiled);
|
||||||
|
|
||||||
static Regex DateRegex { get; } = new Regex(@"(\d{4,4})\/(\d\d)\/(\d\d)\(\w+\)(\d\d):(\d\d)", RegexOptions.Compiled);
|
static Regex DateRegex { get; } = new Regex(@"(\d{4,4})\/(\d\d)\/(\d\d)\(\w+\)(\d\d):(\d\d)", RegexOptions.Compiled);
|
||||||
|
|
||||||
static Regex FilenameRegex { get; } = new Regex(@"File \d+\.[^ ]+ - \([\d\.KMG]+B , \d+x\d+ , (.*) \)", RegexOptions.Compiled);
|
static Regex FilenameRegex { get; } = new Regex(@"File \d+\.[^ ]+ - \([\d\.KMG]+B , \d+x\d+ , (.*) \)", RegexOptions.Compiled);
|
||||||
|
|
||||||
public static async Task GetThread(int threadId)
|
public static async Task<IEnumerable<ThreadPost>> GetThread(int threadId, string destinationPath)
|
||||||
{
|
{
|
||||||
var url = $"http://questden.org/kusaba/quest/res/{threadId}.html";
|
var url = $"http://questden.org/kusaba/quest/res/{threadId}.html";
|
||||||
var options = new JsonSerializerOptions
|
var options = new JsonSerializerOptions
|
||||||
@ -31,26 +31,29 @@ public class QuestdenParse
|
|||||||
WriteIndented = true
|
WriteIndented = true
|
||||||
};
|
};
|
||||||
|
|
||||||
var doc = new HtmlDocument();
|
var doc = new HtmlDocument
|
||||||
doc.OptionEmptyCollection = true;
|
{
|
||||||
|
OptionEmptyCollection = true
|
||||||
|
};
|
||||||
|
|
||||||
if (File.Exists($"thread_{threadId}.json"))
|
// Todo: check if the thread data & parsed entity is of same version
|
||||||
return;
|
if (File.Exists(Path.Join(destinationPath, $"thread_{threadId}.json")))
|
||||||
|
return JsonSerializer.Deserialize<IEnumerable<ThreadPost>>(File.ReadAllText("asd"), options)
|
||||||
|
?? throw new NullReferenceException("No data loaded");
|
||||||
|
|
||||||
var cacheFile = $"cache/QuestDen-{threadId}.html";
|
var cacheDir = Path.Join(destinationPath, "cache");
|
||||||
|
var cacheFile = Path.Join(cacheDir, $"QuestDen-{threadId}.html");
|
||||||
if (!File.Exists(cacheFile))
|
if (!File.Exists(cacheFile))
|
||||||
{
|
{
|
||||||
var httpClient = new HttpClient();
|
var httpClient = new HttpClient();
|
||||||
var content = await httpClient.GetStringAsync(url);
|
var content = await httpClient.GetStringAsync(url);
|
||||||
if (!Directory.Exists("cache"))
|
if (!Directory.Exists(cacheDir))
|
||||||
Directory.CreateDirectory("cache");
|
Directory.CreateDirectory(cacheDir);
|
||||||
File.WriteAllText(cacheFile, content);
|
File.WriteAllText(cacheFile, content);
|
||||||
doc.LoadHtml(content);
|
doc.LoadHtml(content);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
|
||||||
doc.LoadHtml(File.ReadAllText(cacheFile));
|
doc.LoadHtml(File.ReadAllText(cacheFile));
|
||||||
}
|
|
||||||
|
|
||||||
var nodes = doc.DocumentNode.SelectNodes(".//*[@class='reply']|.//form[@id='delform']");
|
var nodes = doc.DocumentNode.SelectNodes(".//*[@class='reply']|.//form[@id='delform']");
|
||||||
|
|
||||||
@ -59,11 +62,11 @@ public class QuestdenParse
|
|||||||
{
|
{
|
||||||
var post = ParsePost(node, threadId);
|
var post = ParsePost(node, threadId);
|
||||||
posts.Add(post);
|
posts.Add(post);
|
||||||
//var postJson = JsonSerializer.Serialize(post);
|
|
||||||
//Console.Out.WriteLine($"{postJson}\n");
|
|
||||||
}
|
}
|
||||||
File.WriteAllText($"thread_{threadId}.json", JsonSerializer.Serialize(posts, options));
|
|
||||||
|
return posts;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static ThreadPost ParsePost(string postHtml, int threadId)
|
public static ThreadPost ParsePost(string postHtml, int threadId)
|
||||||
{
|
{
|
||||||
var htmlDoc = new HtmlDocument();
|
var htmlDoc = new HtmlDocument();
|
||||||
@ -106,7 +109,7 @@ public class QuestdenParse
|
|||||||
post.File = postNode
|
post.File = postNode
|
||||||
.SelectNodes("./div[@class='postwidth']//*[@class='filesize']/a")
|
.SelectNodes("./div[@class='postwidth']//*[@class='filesize']/a")
|
||||||
.SingleOrDefault()
|
.SingleOrDefault()
|
||||||
?.Attributes["href"].Value.Trim();
|
?.Attributes["href"].DeEntitizeValue.Replace("/kusaba/questarch/src/", "").Trim();
|
||||||
|
|
||||||
var filenameRaw = postNode
|
var filenameRaw = postNode
|
||||||
.SelectNodes("./div[@class='postwidth']//*[@class='filesize']")
|
.SelectNodes("./div[@class='postwidth']//*[@class='filesize']")
|
||||||
@ -148,18 +151,14 @@ public class QuestdenParse
|
|||||||
return post;
|
return post;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static ParsedContent ParseContent(string postHtml)
|
public static RootNode ParseContent(string postHtml)
|
||||||
{
|
{
|
||||||
var htmlDoc = new HtmlDocument();
|
var htmlDoc = new HtmlDocument();
|
||||||
htmlDoc.LoadHtml(postHtml);
|
htmlDoc.LoadHtml(postHtml);
|
||||||
var rootNode = RecursiveParse(htmlDoc.DocumentNode);
|
var parseResult = RecursiveParse(htmlDoc.DocumentNode);
|
||||||
if (rootNode is not RootNode)
|
if (parseResult is not RootNode rootNode)
|
||||||
throw new Exception("Parsing returned a non-RootNode root");
|
throw new Exception("Parsing returned a non-RootNode root");
|
||||||
return new ParsedContent
|
return rootNode;
|
||||||
{
|
|
||||||
Version = LatestCompatibleVersion,
|
|
||||||
Nodes = ((RootNode)rootNode).Nodes
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static ContentNode RecursiveParse(HtmlNode node, ContentNode? parentNode = null)
|
private static ContentNode RecursiveParse(HtmlNode node, ContentNode? parentNode = null)
|
||||||
@ -167,8 +166,12 @@ public class QuestdenParse
|
|||||||
if (node is null)
|
if (node is null)
|
||||||
throw new NullReferenceException("Html node is null");
|
throw new NullReferenceException("Html node is null");
|
||||||
|
|
||||||
if (node is HtmlTextNode textNode)
|
if (node is HtmlTextNode textNode) {
|
||||||
return new TextNode { Text = HttpUtility.HtmlDecode(textNode.Text.Trim()) };
|
var decoded = HttpUtility.HtmlDecode(textNode.Text.Trim());
|
||||||
|
if (parentNode is QuoteNode)
|
||||||
|
decoded = Regex.Replace(decoded, @"^>\s*", "");
|
||||||
|
return new TextNode { Text = decoded };
|
||||||
|
}
|
||||||
|
|
||||||
if (node.NodeType is HtmlNodeType.Document or HtmlNodeType.Element)
|
if (node.NodeType is HtmlNodeType.Document or HtmlNodeType.Element)
|
||||||
{
|
{
|
||||||
@ -206,13 +209,20 @@ public class QuestdenParse
|
|||||||
&& match.Success
|
&& match.Success
|
||||||
=> new ReferenceNode
|
=> new ReferenceNode
|
||||||
{
|
{
|
||||||
PostId = int.Parse((match.Groups[5]?.Success ?? false) ? match.Groups[5].Value : match.Groups[4].Value),
|
PostId = int.Parse((match.Groups[3]?.Success ?? false) ? match.Groups[3].Value : match.Groups[2].Value),
|
||||||
ThreadId = int.Parse(match.Groups[4].Value),
|
ThreadId = int.Parse(match.Groups[2].Value),
|
||||||
|
ReferenceType = match.Groups[1].Value switch
|
||||||
|
{
|
||||||
|
"quest" => ReferenceType.QuestActive,
|
||||||
|
"questarch" => ReferenceType.QuestArchive,
|
||||||
|
"questdis" => ReferenceType.QuestDiscussion,
|
||||||
|
_ => throw new InvalidDataException(""),
|
||||||
|
},
|
||||||
LongReference = true
|
LongReference = true
|
||||||
},
|
},
|
||||||
"a" when !node.GetClasses().Any() => new ExternalLinkNode { Destination = node.GetAttributeValue("href", "ERROR") },
|
"a" when !node.GetClasses().Any() => new ExternalLinkNode { Destination = node.GetAttributeValue("href", "ERROR") },
|
||||||
"br" => new NewlineNode { },
|
"br" => new NewlineNode { },
|
||||||
"#document" => new RootNode { },
|
"#document" => new RootNode { Version = LatestCompatibleVersion },
|
||||||
"i" => new ItalicsNode { },
|
"i" => new ItalicsNode { },
|
||||||
"b" => new BoldNode { },
|
"b" => new BoldNode { },
|
||||||
"strike" => new StrikeoutNode { },
|
"strike" => new StrikeoutNode { },
|
||||||
@ -231,6 +241,24 @@ public class QuestdenParse
|
|||||||
&& maybeStyle.Name == "style"
|
&& maybeStyle.Name == "style"
|
||||||
&& maybeStyle.DeEntitizeValue == @"border-bottom: 1px solid"
|
&& maybeStyle.DeEntitizeValue == @"border-bottom: 1px solid"
|
||||||
=> new UnderlineNode { },
|
=> new UnderlineNode { },
|
||||||
|
"span" when
|
||||||
|
node.GetAttributes() is var attributes
|
||||||
|
&& attributes.Count() == 1
|
||||||
|
&& attributes.Single() is var maybeStyle
|
||||||
|
&& maybeStyle.Name == "style"
|
||||||
|
&& maybeStyle.DeEntitizeValue == @"font-size:small;"
|
||||||
|
=> new SmallFontNode { },
|
||||||
|
"span" when
|
||||||
|
node.GetAttributes() is var attributes
|
||||||
|
&& attributes.Count() == 1
|
||||||
|
&& attributes.Single() is var maybeStyle
|
||||||
|
&& maybeStyle.Name == "style"
|
||||||
|
// Let's hope nobody used any colors beyond the hex ones...
|
||||||
|
// But probably will need to add support for that. Eh, later!
|
||||||
|
&& Regex.Match(maybeStyle.DeEntitizeValue, @"^color:\s*(#[0-9a-f]{3,8});?$", RegexOptions.IgnoreCase) is var match
|
||||||
|
&& match is not null
|
||||||
|
&& match.Success
|
||||||
|
=> new ColorNode { Color = match.Groups[1].Value },
|
||||||
"span" when
|
"span" when
|
||||||
node.Descendants().Where(
|
node.Descendants().Where(
|
||||||
d => d is not HtmlTextNode
|
d => d is not HtmlTextNode
|
||||||
@ -241,8 +269,9 @@ public class QuestdenParse
|
|||||||
&& descendants.Single() is HtmlNode innerNode
|
&& descendants.Single() is HtmlNode innerNode
|
||||||
&& innerNode.Name == "iframe"
|
&& innerNode.Name == "iframe"
|
||||||
&& innerNode.GetAttributeValue("src", null).Contains("youtube")
|
&& innerNode.GetAttributeValue("src", null).Contains("youtube")
|
||||||
=> new TextNode { Text = $"Here be youtube link {innerNode.GetAttributeValue("src", null)}"},
|
=> new YoutubeEmbedNode { VideoLink = innerNode.GetAttributes().Single(a => a.Name == "src").DeEntitizeValue },
|
||||||
"div" when
|
// I have seen both being used but I am not sure as to the difference. Different software version?
|
||||||
|
"div" or "span" when
|
||||||
node.GetAttributes() is var attributes
|
node.GetAttributes() is var attributes
|
||||||
&& attributes.Count() == 1
|
&& attributes.Count() == 1
|
||||||
&& attributes.Single() is var maybeStyle
|
&& attributes.Single() is var maybeStyle
|
||||||
@ -251,9 +280,6 @@ public class QuestdenParse
|
|||||||
=> new InlineCodeNode { },
|
=> new InlineCodeNode { },
|
||||||
_ => throw new InvalidDataException($"Unknown node parse attempt: {node.Name} #{node.Id} .{string.Join(".", node.GetClasses())}\n{node.OuterHtml}")
|
_ => throw new InvalidDataException($"Unknown node parse attempt: {node.Name} #{node.Id} .{string.Join(".", node.GetClasses())}\n{node.OuterHtml}")
|
||||||
};
|
};
|
||||||
//if (outNode is ExternalLinkNode refNode)
|
|
||||||
//Console.Out.WriteLine($"Refnode: {string.Join(", ", node.GetClasses())} {node.OuterHtml}");
|
|
||||||
//Console.Out.WriteLine($"{node.Name}: {outNode.GetType().Name} {outNode is ContainerNode} {node.ChildNodes.Count} children, {node.Descendants().Count()} descendants");
|
|
||||||
if (outNode is ContainerNode container)
|
if (outNode is ContainerNode container)
|
||||||
{
|
{
|
||||||
container.Nodes = node.ChildNodes
|
container.Nodes = node.ChildNodes
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
@using System.Linq
|
@using System.Linq
|
||||||
@using QuestReader.Models
|
@using QuestReader.Models
|
||||||
@using QuestReader.Services
|
@using QuestReader.Services
|
||||||
|
@using QuestReader.Extensions
|
||||||
@inherits StandaloneTemplate<TemplateModel>
|
@inherits StandaloneTemplate<TemplateModel>
|
||||||
|
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
@ -17,7 +18,7 @@
|
|||||||
var autoDescription = $"Quest single-page archive. Generated {Model.Now} (UTC), {Model.Posts.Count} posts, {Model.Metadata.Chapters.Count} chapters";
|
var autoDescription = $"Quest single-page archive. Generated {Model.Now} (UTC), {Model.Posts.Count} posts, {Model.Metadata.Chapters.Count} chapters";
|
||||||
// A hack, tbh, should be something better instead..
|
// A hack, tbh, should be something better instead..
|
||||||
var description = Model.Metadata.Description ?? autoDescription;
|
var description = Model.Metadata.Description ?? autoDescription;
|
||||||
var preview = $"https://media.lunar.exchange{Model.BaseUrl}/{Model.Metadata.SocialPreview}";
|
var preview = $"https://media.lunar.exchange{Model.AssetsPath}/{Model.Metadata.SocialPreview}";
|
||||||
}
|
}
|
||||||
<title>@title</title>
|
<title>@title</title>
|
||||||
<link rel="stylesheet" href="main.css">
|
<link rel="stylesheet" href="main.css">
|
||||||
@ -57,40 +58,35 @@
|
|||||||
</header>
|
</header>
|
||||||
<main>
|
<main>
|
||||||
@{
|
@{
|
||||||
Func<(ThreadPost, bool), object> makePost =
|
Func<ThreadPost, object> makePost =
|
||||||
@<article id="post-@item.Item1.Id" class="post@(item.Item1 is not null ? " image-post" : "")@(item.Item2 ? "" : " suggestion-post")">
|
@<article id="post-@item.Id" class="post@(item.File is not null ? " image-post" : "")@(item.AuthorPost ? "" : " suggestion-post")" data-postid="@item.Id">
|
||||||
@if (item.Item1.Title is not null) {
|
@if (item.Title is not null) {
|
||||||
<h2 class="post-self-title">@item.Item1.Title</h2>
|
<h2 class="post-self-title">@item.Title</h2>
|
||||||
}
|
}
|
||||||
<h3 class="post-header"><a class="post-anchor" href="#post-@item.Item1.Id"><span class="post-anchor-mark">#</span>@item.Item1.Id</a> <span class="author">@item.Item1.Author</span> <time>@item.Item1.Date</time></h3>
|
<h3 class="post-header"><a class="post-anchor" href="#post-@item.Id"><span class="post-anchor-mark">#</span>@item.Id</a> <span class="author">@item.Author</span> <time>@item.Date</time></h3>
|
||||||
<div class="post-content">
|
<div class="post-content">
|
||||||
@if (item.Item1.File is not null) {
|
@if (item.File is not null) {
|
||||||
<figure class="post-image">
|
<figure class="post-image">
|
||||||
<img src="@Model.BaseUrl/@item.Item1.File" alt="@item.Item1.Filename">
|
<img src="@Model.AssetsPath/@item.File" alt="@item.Filename">
|
||||||
</figure>
|
</figure>
|
||||||
}
|
}
|
||||||
@if (item.Item1.RawHtml.Trim().Length > 0) {
|
@if (item.RawHtml.Trim().Length > 0) {
|
||||||
<div class="post-text">@Raw(item.Item1.RawHtml)</div>
|
<div class="post-text">@Raw(item.ParsedContent!.RenderContentHtml(Model))</div>
|
||||||
}
|
}
|
||||||
</div>
|
</div>
|
||||||
</article>;
|
</article>
|
||||||
|
;
|
||||||
}
|
}
|
||||||
@foreach (var item in Model.Posts)
|
@foreach (var item in Model.Posts)
|
||||||
{
|
{
|
||||||
@if (item.IsChapterAnnounce) {
|
@if (item.IsChapterAnnounce) {
|
||||||
<h2 id="chapter-@item.Chapter.Id" class="chapter-announce">
|
// This might nullref throw, but let's assume this bool is always set only when this is set too
|
||||||
|
<h2 id="chapter-@item.Chapter!.Id" class="chapter-announce">
|
||||||
<a class="chapter-anchor" href="#chapter-@item.Chapter.Id">#</a> <span class="chapter-name">@item.Chapter.Name</span> - <span class="chapter-subtitle">@item.Chapter.Subtitle</span>
|
<a class="chapter-anchor" href="#chapter-@item.Chapter.Id">#</a> <span class="chapter-name">@item.Chapter.Name</span> - <span class="chapter-subtitle">@item.Chapter.Subtitle</span>
|
||||||
</h2>
|
</h2>
|
||||||
}
|
}
|
||||||
if (item.RepliesTo is not null && item.RepliesTo.Count > 0)
|
|
||||||
{
|
|
||||||
@foreach (var replyId in item.RepliesTo)
|
|
||||||
{
|
|
||||||
@makePost((Model.AllPosts.First(p => p.Id == replyId), false))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@makePost((item, true));
|
@makePost(item);
|
||||||
}
|
}
|
||||||
</main>
|
</main>
|
||||||
<footer>
|
<footer>
|
||||||
|
@ -152,9 +152,12 @@ a.post-anchor:hover .post-anchor-mark {
|
|||||||
|
|
||||||
/* In-chapter stuff */
|
/* In-chapter stuff */
|
||||||
|
|
||||||
.quoted-text {
|
.text-quote {
|
||||||
color: var(--fg-muted);
|
color: var(--fg-muted);
|
||||||
}
|
}
|
||||||
|
.text-quote::before {
|
||||||
|
content: "> "
|
||||||
|
}
|
||||||
|
|
||||||
.post-reference {
|
.post-reference {
|
||||||
color: var(--highlight-blue);
|
color: var(--highlight-blue);
|
||||||
|
Loading…
Reference in New Issue
Block a user