518 lines
13 KiB
TypeScript
518 lines
13 KiB
TypeScript
/**
|
|
* Read markdown files and write their content and metadata to json files which can then be imported by React.
|
|
* - File and directory names starting with an underscore (_) get ignored.
|
|
* - Symbolic links are not supported.
|
|
* - The Filename-to-URL encoder is not perfect. Some non-URL-friendly filenames might cause problems.
|
|
* - series must start with a number followed by an underscore
|
|
*/
|
|
|
|
import fs from "fs" // read and write files
|
|
import path from "path" // get relative path
|
|
import elasticlunr from "elasticlunr" // search index generation
|
|
import matter from "gray-matter" // parse markdown metadata
|
|
import toc from "markdown-toc" // table of contents generation
|
|
|
|
const markdownPath = "./markdown" // where it will look for markdown documents
|
|
const outPath = "./src/data" // path to the json database
|
|
|
|
const contentDirectoryPath = `${outPath}/content`
|
|
const mapFilePath = `${outPath}/map.json`
|
|
|
|
interface Map {
|
|
// key: YYYY-MM-DD
|
|
// value: url
|
|
date: {
|
|
[key: string]: string[]
|
|
}
|
|
|
|
// key: tag name
|
|
// value: url
|
|
tags: {
|
|
[key: string]: string[]
|
|
}
|
|
|
|
// list of all meta data
|
|
meta: {
|
|
tags: string[]
|
|
}
|
|
|
|
// searchable, non-series posts
|
|
// must have a post date
|
|
// tag is not required
|
|
posts: {
|
|
[key: string]: {
|
|
title: string
|
|
date: string
|
|
tags: string[]
|
|
toc: string
|
|
preview: string
|
|
}
|
|
}
|
|
|
|
// series posts have "previous post" and "next post" button so they need to be ordered
|
|
series: {
|
|
[key: string]: {
|
|
title: string
|
|
toc: string // in series home page and ≡ (3 horizontal line) button
|
|
length: number
|
|
order: string[] // url order
|
|
tags: string[]
|
|
}
|
|
}
|
|
|
|
// urls of unsearchable posts
|
|
// it is here to quickly check if a post exists or not
|
|
unsearchable: {
|
|
[key: string]: {
|
|
title: string
|
|
}
|
|
}
|
|
}
|
|
|
|
interface SeriesMap {
|
|
// key: url
|
|
[key: string]: {
|
|
index: number
|
|
url: string
|
|
}[]
|
|
}
|
|
|
|
// searchable data that will be converted to JSON string
|
|
const map: Map = {
|
|
date: {},
|
|
tags: {},
|
|
meta: {
|
|
tags: [],
|
|
},
|
|
posts: {},
|
|
series: {},
|
|
unsearchable: {},
|
|
}
|
|
const seriesMap: SeriesMap = {}
|
|
const index = elasticlunr(function () {
|
|
this.addField("title" as never)
|
|
this.addField("body" as never)
|
|
this.setRef("url" as never)
|
|
})
|
|
|
|
// converts file path to url
|
|
function path2URL(pathToConvert: string): string {
|
|
return `/${path.relative(markdownPath, pathToConvert)}`
|
|
.replace(/\.[^/.]+$/, "") // remove the file extension
|
|
.replace(/ /g, "-") // replace all space with a dash
|
|
}
|
|
|
|
// gets the text after the last slash
|
|
function path2FileOrFolderName(inputPath: string): string {
|
|
// remove trailing slash
|
|
if (inputPath[-1] == "/")
|
|
inputPath = inputPath.slice(0, inputPath.length - 1)
|
|
|
|
// get the last section
|
|
return inputPath.slice(inputPath.lastIndexOf("/") + 1)
|
|
}
|
|
|
|
// A recursive function that calls itself for every files and directories that it finds
|
|
function recursiveParsePosts(fileOrFolderPath: string) {
|
|
// get string after the last slash character
|
|
const fileOrFolderName = path2FileOrFolderName(fileOrFolderPath)
|
|
|
|
// ignore if file or directory name starts with a underscore
|
|
if (fileOrFolderName.startsWith("_")) return
|
|
|
|
// get data about the given path
|
|
const stats = fs.lstatSync(fileOrFolderPath)
|
|
|
|
// if it's a directory, call this function to every files/directories in it
|
|
// if it's a file, parse it and then save it to file
|
|
if (stats.isDirectory()) {
|
|
fs.readdirSync(fileOrFolderPath).map((childPath) => {
|
|
recursiveParsePosts(`${fileOrFolderPath}/${childPath}`)
|
|
})
|
|
} else if (stats.isFile()) {
|
|
// skip if it is not a markdown file
|
|
if (!fileOrFolderName.endsWith(".md")) {
|
|
console.log(`Ignoring non markdown file at: ${fileOrFolderPath}`)
|
|
return
|
|
}
|
|
|
|
// path that will be used as site url
|
|
const urlPath = path2URL(fileOrFolderPath)
|
|
|
|
// parse markdown metadata
|
|
const parsedMarkdown = matter(fs.readFileSync(fileOrFolderPath, "utf8"))
|
|
|
|
if (!parsedMarkdown.data.title) {
|
|
throw Error(`Title is not defined in file: ${fileOrFolderPath}`)
|
|
}
|
|
|
|
if (!parsedMarkdown.data.date) {
|
|
throw Error(`Date is not defined in file: ${fileOrFolderPath}`)
|
|
}
|
|
|
|
// urlPath starts with a slash
|
|
const contentFilePath = `${contentDirectoryPath}${urlPath}.json`
|
|
|
|
// create directory to put json content files
|
|
fs.mkdirSync(
|
|
contentFilePath.slice(0, contentFilePath.lastIndexOf("/")),
|
|
{ recursive: true }
|
|
)
|
|
|
|
// write content to json file
|
|
fs.writeFileSync(
|
|
contentFilePath,
|
|
JSON.stringify({
|
|
content: parsedMarkdown.content.trim(),
|
|
})
|
|
)
|
|
|
|
// Parse data that will be written to map.js
|
|
const postData = {
|
|
title: parsedMarkdown.data.title,
|
|
preview: "",
|
|
date: "",
|
|
tags: [],
|
|
toc: toc(parsedMarkdown.content).content,
|
|
}
|
|
|
|
// content preview
|
|
// parsedMarkdown.excerpt is intentionally not used
|
|
// todo: fix potential improper closing of html tag
|
|
const slicedContent = parsedMarkdown.content.split(" ")
|
|
if (slicedContent.length > 19) {
|
|
postData.preview = slicedContent.slice(0, 19).join(" ") + " ..."
|
|
} else {
|
|
postData.preview = parsedMarkdown.content
|
|
}
|
|
|
|
// date
|
|
const postDate = new Date(parsedMarkdown.data.date)
|
|
postData.date = postDate.toLocaleString("default", {
|
|
month: "short",
|
|
day: "numeric",
|
|
year: "numeric",
|
|
})
|
|
|
|
const YYYY_MM_DD = postDate.toISOString().split("T")[0]
|
|
if (map.date[YYYY_MM_DD]) {
|
|
map.date[YYYY_MM_DD].push(urlPath)
|
|
} else {
|
|
map.date[YYYY_MM_DD] = [urlPath]
|
|
}
|
|
|
|
//tags
|
|
postData.tags = parsedMarkdown.data.tags
|
|
if (postData.tags) {
|
|
postData.tags.forEach((tag) => {
|
|
if (map.tags[tag]) {
|
|
map.tags[tag].push(urlPath)
|
|
} else {
|
|
map.tags[tag] = [urlPath]
|
|
}
|
|
})
|
|
}
|
|
|
|
map.posts[urlPath] = postData
|
|
index.addDoc({
|
|
title: parsedMarkdown.data.title,
|
|
body: parsedMarkdown.content,
|
|
url: urlPath,
|
|
})
|
|
}
|
|
}
|
|
|
|
function recursiveParseUnsearchable(fileOrFolderPath: string) {
|
|
// get string after the last slash character
|
|
const fileOrFolderName = path2FileOrFolderName(fileOrFolderPath)
|
|
|
|
// ignore if file or directory name starts with a underscore
|
|
if (fileOrFolderName.startsWith("_")) return
|
|
|
|
// illegal names
|
|
if (
|
|
fileOrFolderPath == "./markdown/unsearchable/posts" ||
|
|
fileOrFolderPath == "./markdown/unsearchable/series"
|
|
)
|
|
throw Error(
|
|
`Illegal name (posts/series) in path: "${fileOrFolderPath}".`
|
|
)
|
|
|
|
// get data about the given path
|
|
const stats = fs.lstatSync(fileOrFolderPath)
|
|
|
|
// if it's a directory, call this function to every files/directories in it
|
|
// if it's a file, parse it and then save it to file
|
|
if (stats.isDirectory()) {
|
|
fs.readdirSync(fileOrFolderPath).map((childPath) => {
|
|
recursiveParseUnsearchable(`${fileOrFolderPath}/${childPath}`)
|
|
})
|
|
} else if (stats.isFile()) {
|
|
// skip if it is not a markdown file
|
|
if (!fileOrFolderName.endsWith(".md")) {
|
|
console.log(`Ignoring non markdown file at: ${fileOrFolderPath}`)
|
|
return
|
|
}
|
|
|
|
const _urlPath = path2URL(fileOrFolderPath)
|
|
const urlPath = _urlPath.slice(
|
|
_urlPath
|
|
.slice(1) // ignore the first slash
|
|
.indexOf("/") + 1
|
|
)
|
|
|
|
// parse markdown metadata
|
|
const parsedMarkdown = matter(fs.readFileSync(fileOrFolderPath, "utf8"))
|
|
|
|
if (!parsedMarkdown.data.title) {
|
|
throw Error(`Title is not defined in file: ${fileOrFolderPath}`)
|
|
}
|
|
|
|
// urlPath starts with a slash
|
|
const contentFilePath = `${contentDirectoryPath}/unsearchable${urlPath}.json`
|
|
|
|
// create directory to put json content files
|
|
fs.mkdirSync(
|
|
contentFilePath.slice(0, contentFilePath.lastIndexOf("/")),
|
|
{ recursive: true }
|
|
)
|
|
|
|
// write content to json file
|
|
fs.writeFileSync(
|
|
contentFilePath,
|
|
JSON.stringify({
|
|
content: parsedMarkdown.content.trim(),
|
|
})
|
|
)
|
|
|
|
// Parse data that will be written to map.js
|
|
map.unsearchable[urlPath] = {
|
|
title: parsedMarkdown.data.title,
|
|
}
|
|
|
|
index.addDoc({
|
|
title: parsedMarkdown.data.title,
|
|
body: parsedMarkdown.content,
|
|
url: urlPath,
|
|
})
|
|
}
|
|
}
|
|
|
|
function recursiveParseSeries(fileOrFolderPath: string) {
|
|
// get string after the last slash character
|
|
const fileOrFolderName = path2FileOrFolderName(fileOrFolderPath)
|
|
|
|
// ignore if file or directory name starts with a underscore
|
|
if (fileOrFolderName.startsWith("_")) return
|
|
|
|
// get data about the given path
|
|
const stats = fs.lstatSync(fileOrFolderPath)
|
|
|
|
// if it's a directory, call this function to every files/directories in it
|
|
// if it's a file, parse it and then save it to file
|
|
if (stats.isDirectory()) {
|
|
fs.readdirSync(fileOrFolderPath).map((childPath) => {
|
|
recursiveParseSeries(`${fileOrFolderPath}/${childPath}`)
|
|
})
|
|
} else if (stats.isFile()) {
|
|
// skip if it is not a markdown file
|
|
if (!fileOrFolderName.endsWith(".md")) {
|
|
console.log(`Ignoring non markdown file at: ${fileOrFolderPath}`)
|
|
return
|
|
}
|
|
|
|
if (
|
|
!fileOrFolderName.includes("_") &&
|
|
!fileOrFolderName.startsWith("0")
|
|
)
|
|
throw Error(`Invalid series post file name at: ${fileOrFolderPath}`)
|
|
|
|
// parse markdown metadata
|
|
const parsedMarkdown = matter(fs.readFileSync(fileOrFolderPath, "utf8"))
|
|
|
|
if (!parsedMarkdown.data.title) {
|
|
throw Error(`Title is not defined in file: ${fileOrFolderPath}`)
|
|
}
|
|
|
|
if (!parsedMarkdown.data.date) {
|
|
throw Error(`Date is not defined in file: ${fileOrFolderPath}`)
|
|
}
|
|
|
|
// path that will be used as site url
|
|
let urlPath = path2URL(fileOrFolderPath)
|
|
urlPath = urlPath.slice(0, urlPath.lastIndexOf("_"))
|
|
urlPath = urlPath.replace(/\/$/, "") // remove trailing slash
|
|
|
|
// urlPath starts with a slash
|
|
const contentFilePath = `${contentDirectoryPath}${urlPath}.json`
|
|
|
|
// create directory to put json content files
|
|
fs.mkdirSync(
|
|
contentFilePath.slice(0, contentFilePath.lastIndexOf("/")),
|
|
{ recursive: true }
|
|
)
|
|
|
|
// write content to json file
|
|
fs.writeFileSync(
|
|
contentFilePath,
|
|
JSON.stringify({
|
|
content: parsedMarkdown.content.trim(),
|
|
})
|
|
)
|
|
|
|
// Parse data that will be written to map.js
|
|
const postData = {
|
|
title: parsedMarkdown.data.title,
|
|
preview: "",
|
|
date: "",
|
|
tags: [],
|
|
toc: toc(parsedMarkdown.content).content,
|
|
}
|
|
|
|
// content preview
|
|
// parsedMarkdown.excerpt is intentionally not used
|
|
// todo: fix potential improper closing of html tag
|
|
const slicedContent = parsedMarkdown.content.split(" ")
|
|
if (slicedContent.length > 19) {
|
|
postData.preview = slicedContent.slice(0, 19).join(" ") + " ..."
|
|
} else {
|
|
postData.preview = parsedMarkdown.content
|
|
}
|
|
|
|
// date
|
|
const postDate = new Date(parsedMarkdown.data.date)
|
|
postData.date = postDate.toLocaleString("default", {
|
|
month: "short",
|
|
day: "numeric",
|
|
year: "numeric",
|
|
})
|
|
|
|
const YYYY_MM_DD = postDate.toISOString().split("T")[0]
|
|
if (map.date[YYYY_MM_DD]) {
|
|
map.date[YYYY_MM_DD].push(urlPath)
|
|
} else {
|
|
map.date[YYYY_MM_DD] = [urlPath]
|
|
}
|
|
|
|
//tags
|
|
postData.tags = parsedMarkdown.data.tags
|
|
if (postData.tags) {
|
|
postData.tags.forEach((tag) => {
|
|
if (map.tags[tag]) {
|
|
map.tags[tag].push(urlPath)
|
|
} else {
|
|
map.tags[tag] = [urlPath]
|
|
}
|
|
})
|
|
}
|
|
|
|
if (fileOrFolderName.startsWith("0")) {
|
|
map.series[urlPath] = { ...postData, order: [], length: 0 }
|
|
} else {
|
|
map.posts[urlPath] = postData
|
|
index.addDoc({
|
|
title: parsedMarkdown.data.title,
|
|
body: parsedMarkdown.content,
|
|
url: urlPath,
|
|
})
|
|
for (const key of Object.keys(map.series)) {
|
|
if (urlPath.slice(0, urlPath.lastIndexOf("/")).includes(key)) {
|
|
const index = parseInt(
|
|
fileOrFolderName.slice(
|
|
0,
|
|
fileOrFolderName.lastIndexOf("_")
|
|
)
|
|
)
|
|
|
|
if (isNaN(index)) {
|
|
throw Error(
|
|
`Invalid series index at: ${fileOrFolderPath}`
|
|
)
|
|
}
|
|
|
|
const itemToPush = {
|
|
index: index,
|
|
url: urlPath,
|
|
}
|
|
|
|
if (seriesMap[key]) {
|
|
seriesMap[key].push(itemToPush)
|
|
} else {
|
|
seriesMap[key] = [itemToPush]
|
|
}
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Delete existing files
|
|
|
|
try {
|
|
fs.rmSync(contentDirectoryPath, { recursive: true })
|
|
// eslint-disable-next-line no-empty
|
|
} catch (err) {}
|
|
|
|
try {
|
|
fs.unlinkSync(mapFilePath)
|
|
// eslint-disable-next-line no-empty
|
|
} catch (err) {}
|
|
|
|
// check if it's a directory and start recursive parse function
|
|
if (!fs.lstatSync(markdownPath).isDirectory())
|
|
throw Error("Invalid markdown path")
|
|
|
|
if (!fs.lstatSync(markdownPath + "/posts").isDirectory())
|
|
throw Error(`Cannot find directory: ${markdownPath + "/posts"}`)
|
|
|
|
if (!fs.lstatSync(markdownPath + "/unsearchable").isDirectory())
|
|
throw Error(`Cannot find directory: ${markdownPath + "/posts"}`)
|
|
|
|
if (!fs.lstatSync(markdownPath + "/series").isDirectory())
|
|
throw Error(`Cannot find directory: ${markdownPath + "/posts"}`)
|
|
|
|
recursiveParsePosts(markdownPath + "/posts")
|
|
recursiveParseUnsearchable(markdownPath + "/unsearchable")
|
|
recursiveParseSeries(markdownPath + "/series")
|
|
|
|
// sort dates
|
|
let dateKeys: string[] = []
|
|
for (const dateKey in map.date) {
|
|
dateKeys.push(dateKey)
|
|
}
|
|
|
|
dateKeys = dateKeys.sort()
|
|
|
|
const TmpDate = map.date
|
|
map.date = {}
|
|
|
|
dateKeys.forEach((sortedDateKey) => {
|
|
map.date[sortedDateKey] = TmpDate[sortedDateKey]
|
|
})
|
|
|
|
// fill meta data
|
|
for (const tag in map.tags) {
|
|
map.meta.tags.push(tag)
|
|
}
|
|
|
|
// sort series post
|
|
for (const seriesURL in seriesMap) {
|
|
seriesMap[seriesURL].sort((a, b) => {
|
|
if (a.index < b.index) {
|
|
return -1
|
|
}
|
|
if (a.index > b.index) {
|
|
return 1
|
|
}
|
|
return 0
|
|
})
|
|
}
|
|
|
|
for (const seriesURL in seriesMap) {
|
|
map.series[seriesURL].length = seriesMap[seriesURL].length
|
|
map.series[seriesURL].order = seriesMap[seriesURL].map((item) => item.url)
|
|
}
|
|
|
|
fs.writeFileSync(mapFilePath, JSON.stringify(map))
|
|
fs.writeFileSync(outPath + "/search.json", JSON.stringify(index))
|