pompy.dev/source/generate.ts

505 lines
13 KiB
TypeScript

/**
* Read markdown files and write their content and metadata to json files which can then be imported by React.
* - File and directory names starting with an underscore (_) get ignored.
* - Symbolic links are not supported.
* - The Filename-to-URL encoder is not perfect. Some non-URL-friendly filenames might cause problems.
* - series must start with a number followed by an underscore
*/
import fs from "fs" // read and write files
import path from "path" // get relative path
import elasticlunr from "elasticlunr" // search index generation
import readTimeEstimate from "read-time-estimate" // post read time estimation
import matter from "gray-matter" // parse markdown metadata
import toc from "markdown-toc" // table of contents generation
import markdownIt from "markdown-it" // rendering markdown
import markdownItTexMath from "markdown-it-texmath" // rendering mathematical expression
import markdownItAnchor from "markdown-it-anchor" // markdown anchor
import hljs from "highlight.js" // code block highlighting
import katex from "katex" // rendering mathematical expression
const markdownPath = "./markdown" // where it will look for markdown documents
const outPath = "./src/data" // path to the json database
const contentDirectoryPath = `${outPath}/content`
const mapFilePath = `${outPath}/map.json`
interface Map {
// key: YYYY-MM-DD
// value: url
date: {
[key: string]: string[]
}
// key: tag name
// value: url
tags: {
[key: string]: string[]
}
// list of all meta data
meta: {
tags: string[]
}
// searchable, non-series posts
// must have a post date
// tag is not required
posts: {
[key: string]: {
title: string
date: string
tags: string[]
preview: string
}
}
// series posts have "previous post" and "next post" button so they need to be ordered
series: {
[key: string]: {
title: string
length: number
order: string[] // url order
tags: string[]
}
}
// urls of unsearchable posts
// it is here to quickly check if a post exists or not
unsearchable: {
[key: string]: {
title: string
}
}
}
interface SeriesMap {
// key: url
[key: string]: {
index: number
url: string
}[]
}
// searchable data that will be converted to JSON string
const map: Map = {
date: {},
tags: {},
meta: {
tags: [],
},
posts: {},
series: {},
unsearchable: {},
}
const seriesMap: SeriesMap = {}
const elasticlunrIndex = elasticlunr(function () {
this.addField("title" as never)
this.addField("body" as never)
this.setRef("url" as never)
})
const md = markdownIt({
highlight: function (str, lang) {
if (lang && hljs.getLanguage(lang)) {
try {
return hljs.highlight(str, { language: lang }).value
// eslint-disable-next-line no-empty
} catch (error) {}
}
return "" // use external default escaping
},
html: true,
})
.use(markdownItTexMath, {
engine: katex,
delimiters: "dollars",
katexOptions: { macros: { "\\RR": "\\mathbb{R}" } },
})
.use(markdownItAnchor, {})
// converts file path to url
function path2URL(pathToConvert: string): string {
return `/${path.relative(markdownPath, pathToConvert)}`
.replace(/\.[^/.]+$/, "") // remove the file extension
.replace(/ /g, "-") // replace all space with a dash
}
// gets the text after the last slash
function path2FileOrFolderName(inputPath: string): string {
// remove trailing slash
if (inputPath[-1] == "/")
inputPath = inputPath.slice(0, inputPath.length - 1)
// get the last section
return inputPath.slice(inputPath.lastIndexOf("/") + 1)
}
// gets the nth occurance of a pattern in string
// returns -1 if nothing is found
// https://stackoverflow.com/a/14482123/12979111
function nthIndex(str: string, pat: string, n: number) {
let i = -1
while (n-- && i++ < str.length) {
i = str.indexOf(pat, i)
if (i < 0) break
}
return i
}
function writeToJSON(JSONFilePath: string, dataToWrite: string) {
// create directory to put json content files
fs.mkdirSync(JSONFilePath.slice(0, JSONFilePath.lastIndexOf("/")), {
recursive: true,
})
// write content to json file
fs.writeFileSync(JSONFilePath, dataToWrite)
}
// A recursive function that calls itself for every files and directories that it finds
function recursiveParse(
mode: "posts" | "series" | "unsearchable",
fileOrFolderPath: string
) {
if (mode == "unsearchable") {
// illegal names
if (
fileOrFolderPath == "./markdown/unsearchable/posts" ||
fileOrFolderPath == "./markdown/unsearchable/series"
)
throw Error(
`Illegal name (posts/series) in path: "${fileOrFolderPath}".`
)
}
// get string after the last slash character
const fileOrFolderName = path2FileOrFolderName(fileOrFolderPath)
// ignore if file or directory name starts with a underscore
if (fileOrFolderName.startsWith("_")) return
// get data about the given path
const stats = fs.lstatSync(fileOrFolderPath)
// if it's a directory, call this function to every files/directories in it
// if it's a file, parse it and then save it to file
if (stats.isDirectory()) {
fs.readdirSync(fileOrFolderPath).map((childPath) => {
recursiveParse(mode, `${fileOrFolderPath}/${childPath}`)
})
} else if (stats.isFile()) {
// skip if it is not a markdown file
if (!fileOrFolderName.endsWith(".md")) {
console.log(`Ignoring non markdown file at: ${fileOrFolderPath}`)
return
}
// read markdown file
const markdownRaw = fs.readFileSync(fileOrFolderPath, "utf8")
// parse markdown metadata
const markdownData = matter(
markdownRaw.slice(0, nthIndex(markdownRaw, "---", 2) + 3)
).data
if (!markdownData.title)
throw Error(`Title is not defined in file: ${fileOrFolderPath}`)
markdownData.content =
md.render(markdownRaw.slice(nthIndex(markdownRaw, "---", 2) + 3)) ||
""
// https://github.com/pritishvaidya/read-time-estimate
const { humanizedDuration, totalWords } = readTimeEstimate(
markdownData.content,
275,
12,
500,
["img", "Image"]
)
if (mode == "posts") {
if (!markdownData.date) {
throw Error(`Date is not defined in file: ${fileOrFolderPath}`)
}
// path that will be used as site url (starts with a slash)
const urlPath = path2URL(fileOrFolderPath)
writeToJSON(
`${contentDirectoryPath}${urlPath}.json`,
JSON.stringify({
content: markdownData.content,
toc: toc(markdownRaw).json,
})
)
// Parse data that will be written to map.js
const postData = {
title: markdownData.title,
preview: "",
date: "",
readTime: humanizedDuration,
wordCount: totalWords,
tags: [],
}
// content preview
// parsedMarkdown.excerpt is intentionally not used
// todo: fix potential improper closing of html tag
const slicedContent = markdownData.content.split(" ")
if (slicedContent.length > 19) {
postData.preview = slicedContent.slice(0, 19).join(" ") + " ..."
} else {
postData.preview = markdownData.content
}
// date
const postDate = new Date(markdownData.date)
postData.date = postDate.toLocaleString("default", {
month: "short",
day: "numeric",
year: "numeric",
})
const YYYY_MM_DD = postDate.toISOString().split("T")[0]
if (map.date[YYYY_MM_DD]) {
map.date[YYYY_MM_DD].push(urlPath)
} else {
map.date[YYYY_MM_DD] = [urlPath]
}
//tags
postData.tags = markdownData.tags
if (postData.tags) {
postData.tags.forEach((tag) => {
if (map.tags[tag]) {
map.tags[tag].push(urlPath)
} else {
map.tags[tag] = [urlPath]
}
})
}
map.posts[urlPath] = postData
elasticlunrIndex.addDoc({
title: markdownData.title,
body: markdownData.content,
url: urlPath,
})
} else if (mode == "unsearchable") {
// path that will be used as site url (starts with a slash)
const _urlPath = path2URL(fileOrFolderPath)
const urlPath = _urlPath.slice(
_urlPath
.slice(1) // ignore the first slash
.indexOf("/") + 1
)
writeToJSON(
`${contentDirectoryPath}/unsearchable${urlPath}.json`,
JSON.stringify({
content: markdownData.content,
})
)
// Parse data that will be written to map.js
map.unsearchable[urlPath] = {
title: markdownData.title,
}
elasticlunrIndex.addDoc({
title: markdownData.title,
body: markdownData.content,
url: urlPath,
})
} else if (mode == "series") {
if (
!fileOrFolderName.includes("_") &&
!fileOrFolderName.startsWith("0")
)
throw Error(
`Invalid series post file name at: ${fileOrFolderPath}`
)
if (!markdownData.date) {
throw Error(`Date is not defined in file: ${fileOrFolderPath}`)
}
// path that will be used as site url (starts with a slash)
let urlPath = path2URL(fileOrFolderPath)
urlPath = urlPath.slice(0, urlPath.lastIndexOf("_"))
urlPath = urlPath.replace(/\/$/, "") // remove trailing slash
writeToJSON(
`${contentDirectoryPath}${urlPath}.json`,
JSON.stringify({
content: markdownData.content,
toc: toc(markdownRaw).json,
})
)
// Parse data that will be written to map.js
const postData = {
title: markdownData.title,
preview: "",
date: "",
readTime: humanizedDuration,
wordCount: totalWords,
tags: [],
}
// content preview
// parsedMarkdown.excerpt is intentionally not used
// todo: fix potential improper closing of html tag
const slicedContent = markdownData.content.split(" ")
if (slicedContent.length > 19) {
postData.preview = slicedContent.slice(0, 19).join(" ") + " ..."
} else {
postData.preview = markdownData.content
}
// date
const postDate = new Date(markdownData.date)
postData.date = postDate.toLocaleString("default", {
month: "short",
day: "numeric",
year: "numeric",
})
const YYYY_MM_DD = postDate.toISOString().split("T")[0]
if (map.date[YYYY_MM_DD]) {
map.date[YYYY_MM_DD].push(urlPath)
} else {
map.date[YYYY_MM_DD] = [urlPath]
}
//tags
postData.tags = markdownData.tags
if (postData.tags) {
postData.tags.forEach((tag) => {
if (map.tags[tag]) {
map.tags[tag].push(urlPath)
} else {
map.tags[tag] = [urlPath]
}
})
}
if (fileOrFolderName.startsWith("0")) {
map.series[urlPath] = { ...postData, order: [], length: 0 }
} else {
map.posts[urlPath] = postData
elasticlunrIndex.addDoc({
title: markdownData.title,
body: markdownData.content,
url: urlPath,
})
for (const key of Object.keys(map.series)) {
if (
urlPath.slice(0, urlPath.lastIndexOf("/")).includes(key)
) {
const index = parseInt(
fileOrFolderName.slice(
0,
fileOrFolderName.lastIndexOf("_")
)
)
if (isNaN(index)) {
throw Error(
`Invalid series index at: ${fileOrFolderPath}`
)
}
const itemToPush = {
index: index,
url: urlPath,
}
if (seriesMap[key]) {
seriesMap[key].push(itemToPush)
} else {
seriesMap[key] = [itemToPush]
}
break
}
}
}
}
}
}
// Delete existing files
try {
fs.rmSync(contentDirectoryPath, { recursive: true })
// eslint-disable-next-line no-empty
} catch (err) {}
try {
fs.unlinkSync(mapFilePath)
// eslint-disable-next-line no-empty
} catch (err) {}
// check if it's a directory and start recursive parse function
if (!fs.lstatSync(markdownPath).isDirectory())
throw Error("Invalid markdown path")
if (!fs.lstatSync(markdownPath + "/posts").isDirectory())
throw Error(`Cannot find directory: ${markdownPath + "/posts"}`)
if (!fs.lstatSync(markdownPath + "/unsearchable").isDirectory())
throw Error(`Cannot find directory: ${markdownPath + "/posts"}`)
if (!fs.lstatSync(markdownPath + "/series").isDirectory())
throw Error(`Cannot find directory: ${markdownPath + "/posts"}`)
recursiveParse("posts", markdownPath + "/posts")
recursiveParse("unsearchable", markdownPath + "/unsearchable")
recursiveParse("series", markdownPath + "/series")
// sort dates
let dateKeys: string[] = []
for (const dateKey in map.date) {
dateKeys.push(dateKey)
}
dateKeys = dateKeys.sort()
const TmpDate = map.date
map.date = {}
dateKeys.forEach((sortedDateKey) => {
map.date[sortedDateKey] = TmpDate[sortedDateKey]
})
// fill meta data
for (const tag in map.tags) {
map.meta.tags.push(tag)
}
// sort series post
for (const seriesURL in seriesMap) {
seriesMap[seriesURL].sort((a, b) => {
if (a.index < b.index) {
return -1
}
if (a.index > b.index) {
return 1
}
return 0
})
}
for (const seriesURL in seriesMap) {
map.series[seriesURL].length = seriesMap[seriesURL].length
map.series[seriesURL].order = seriesMap[seriesURL].map((item) => item.url)
}
fs.writeFileSync(mapFilePath, JSON.stringify(map))
fs.writeFileSync(outPath + "/search.json", JSON.stringify(elasticlunrIndex))