-
Notifications
You must be signed in to change notification settings - Fork 0
/
ArticleIndex.cs
74 lines (64 loc) · 2.38 KB
/
ArticleIndex.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
using System.Text;
using System.Text.Encodings.Web;
using System.Text.Json;
using System.Text.Unicode;
using System.Text.RegularExpressions;
namespace OMinimoScrapper;
public static class ArticleIndex
{
public static async Task<List<Article>> FromTextFile(string filename, Encoding encoding = null!)
{
var list = new List<Article>();
var lines = await File.ReadAllLinesAsync(filename, encoding ?? Encoding.UTF8);
string chapter = string.Empty;
string subChapter = string.Empty;
var regexpSubChapter = new Regex(@"^ +\d+\. (.*)$");
foreach (var line in lines)
{
if (!line.StartsWith(" "))
{
chapter = TextUtility.CapitalizeFirst(line.Trim().ToLower());
subChapter = string.Empty;
}
else if (regexpSubChapter.IsMatch(line))
{
subChapter = regexpSubChapter.Replace(line, "$1").Trim();
}
else
{
var title = line.Trim();
var article = new Article
{
Chapter = chapter,
SubChapter = subChapter,
Title = title
};
if (title.Contains("[in:"))
{
var regexpAlternativeTitle = new Regex(@"^(.+) \[in: (.+)\]$");
article.Title = regexpAlternativeTitle.Replace(title, "$1");
article.Metadata = new ArticleMetadata();
article.Metadata.Title = regexpAlternativeTitle.Replace(title, "$2");
}
list.Add(article);
}
}
return list;
}
static JsonSerializerOptions options = new JsonSerializerOptions
{
WriteIndented = true,
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
Encoder = JavaScriptEncoder.Create(UnicodeRanges.All)
};
internal static async Task ToJsonFile(List<Article> articles, string resultFile)
{
using FileStream fs = File.Open(resultFile, FileMode.Create);
await JsonSerializer.SerializeAsync(fs, articles, options);
}
internal static async Task<List<Article>?> FromJsonFile(string jsonFile)
{
using FileStream fs = File.Open(jsonFile, FileMode.Open);
return await JsonSerializer.DeserializeAsync<List<Article>>(fs, options);
}
}