import fs from "fs" import readTimeEstimate from "read-time-estimate" // post read time estimation import matter from "gray-matter" // parse markdown metadata import { JSDOM } from "jsdom" // HTML DOM parsing import { nthIndex, path2FileOrFolderName, path2URL, writeToJSON } from "./util" import parseMarkdown, { generateToc } from "./parseMarkdown" import { contentDirectoryPath } from "./config" import { addDocument } from "./searchIndex" import { map, seriesMap } from "." import { MarkdownData, ParseMode, PostData } from "../types/typing" // path that should not be checked when parsing in unsearchable mode const illegalPaths = [ "./markdown/unsearchable/posts", "./markdown/unsearchable/series", ] interface DataToPass { path: string urlPath: string fileOrFolderName: string markdownRaw: string markdownData: MarkdownData humanizedDuration: string totalWords: number } /** * A recursive function that calls itself for every files and directories that it finds * * @param {ParseMode} mode * @param {string} path - path of file or folder * * @returns {void} */ export function recursiveParse(mode: ParseMode, path: string): void { // don't parse specific directories when parsing unsearchable content if (mode == ParseMode.UNSEARCHABLE) { if (illegalPaths.includes(path)) { throw Error(`Illegal name (posts/series) in path: "${path}".`) } } const fileOrFolderName = path2FileOrFolderName(path) if (fileOrFolderName.startsWith("_")) return const stats = fs.lstatSync(path) // if it's a directory, call this function to every files/directories in it // if it's a file, parse it and then save it to file if (stats.isDirectory()) { fs.readdirSync(path).map((childPath) => { recursiveParse(mode, `${path}/${childPath}`) }) } else if (stats.isFile()) { parseFile(mode, path, fileOrFolderName) } } function parseFile( mode: ParseMode, path: string, fileOrFolderName: string ): void { // skip if it is not a markdown file if (!fileOrFolderName.endsWith(".md")) { console.log(`Ignoring non markdown file at: ${path}`) return } const markdownRaw = fs.readFileSync(path, "utf8") const markdownData: MarkdownData = parseFrontMatter(markdownRaw, path, mode) // https://github.com/pritishvaidya/read-time-estimate const { humanizedDuration, totalWords } = readTimeEstimate( markdownData.content, 275, 12, 500, ["img", "Image"] ) const dataToPass: DataToPass = { path, urlPath: path2URL(path), fileOrFolderName, markdownRaw, markdownData, humanizedDuration, totalWords, } switch (mode) { case ParseMode.POSTS: { parsePost(dataToPass) break } case ParseMode.UNSEARCHABLE: { dataToPass.urlPath = dataToPass.urlPath.slice( dataToPass.urlPath .slice(1) // ignore the first slash .indexOf("/") + 1 ) parseUnsearchable(dataToPass) break } case ParseMode.SERIES: { let urlPath = dataToPass.urlPath urlPath = urlPath.slice(0, urlPath.lastIndexOf("_")) dataToPass.urlPath = urlPath.replace(/\/$/, "") // remove trailing slash parseSeries(dataToPass) break } } } function parsePost(data: DataToPass): void { const { urlPath, markdownRaw, markdownData, humanizedDuration, totalWords, } = data const postData: PostData = { title: markdownData.title, date: "", readTime: humanizedDuration, wordCount: totalWords, tags: [], } /** * Dates */ const postDate = new Date(markdownData.date) postData.date = postDate.toLocaleString("default", { month: "short", day: "numeric", year: "numeric", }) const YYYY_MM_DD = postDate.toISOString().split("T")[0] if (map.date[YYYY_MM_DD]) { map.date[YYYY_MM_DD].push(urlPath) } else { map.date[YYYY_MM_DD] = [urlPath] } /** * Tags */ postData.tags = markdownData.tags if (postData.tags) { postData.tags.forEach((tag) => { if (map.tags[tag]) { map.tags[tag].push(urlPath) } else { map.tags[tag] = [urlPath] } }) } /** * */ map.posts[urlPath] = postData addDocument({ title: markdownData.title, body: markdownData.content, url: urlPath, }) writeToJSON( `${contentDirectoryPath}${urlPath}.json`, JSON.stringify({ content: markdownData.content, toc: generateToc(markdownRaw), }) ) } function parseSeries(data: DataToPass): void { const { path, urlPath, fileOrFolderName, markdownRaw, markdownData, humanizedDuration, totalWords, } = data if (!fileOrFolderName.includes("_") && !fileOrFolderName.startsWith("0")) throw Error(`Invalid series post file name at: ${path}`) const postData: PostData = { title: markdownData.title, date: "", readTime: humanizedDuration, wordCount: totalWords, tags: [], } /** * Date */ const postDate = new Date(markdownData.date) postData.date = postDate.toLocaleString("default", { month: "short", day: "numeric", year: "numeric", }) const YYYY_MM_DD = postDate.toISOString().split("T")[0] if (map.date[YYYY_MM_DD]) { map.date[YYYY_MM_DD].push(urlPath) } else { map.date[YYYY_MM_DD] = [urlPath] } /** * Tags */ postData.tags = markdownData.tags if (postData.tags) { postData.tags.forEach((tag) => { if (map.tags[tag]) { map.tags[tag].push(urlPath) } else { map.tags[tag] = [urlPath] } }) } // series markdown starting with 0 is a series descriptor if (fileOrFolderName.startsWith("0")) { map.series[urlPath] = { ...postData, order: [], length: 0, } } else { addDocument({ title: markdownData.title, body: markdownData.content, url: urlPath, }) map.posts[urlPath] = postData for (const key of Object.keys(map.series)) { if (urlPath.slice(0, urlPath.lastIndexOf("/")).includes(key)) { const index = parseInt( fileOrFolderName.slice(0, fileOrFolderName.lastIndexOf("_")) ) if (isNaN(index)) throw Error(`Invalid series index at: ${path}`) const itemToPush = { index: index, url: urlPath, } if (seriesMap[key]) { seriesMap[key].push(itemToPush) } else { seriesMap[key] = [itemToPush] } break } } } /** * */ writeToJSON( `${contentDirectoryPath}${urlPath}.json`, JSON.stringify({ content: markdownData.content, toc: generateToc(markdownRaw), }) ) } function parseUnsearchable(data: DataToPass): void { const { urlPath, markdownData } = data addDocument({ title: markdownData.title, body: markdownData.content, url: urlPath, }) // Parse data that will be written to map.js map.unsearchable[urlPath] = { title: markdownData.title, } writeToJSON( `${contentDirectoryPath}/unsearchable${urlPath}.json`, JSON.stringify({ content: markdownData.content, }) ) } /** * todo: accurately calculate start and end of front matter * * @param {string} markdownRaw * @param {string} path * * @returns {MarkdownData} */ function parseFrontMatter( markdownRaw: string, path: string, mode: ParseMode ): MarkdownData { const result = matter( markdownRaw.slice(0, nthIndex(markdownRaw, "---", 2) + 3) ).data if (!result.title) throw Error(`Title is not defined in file: ${path}`) if (mode != ParseMode.UNSEARCHABLE && !result.date) throw Error(`Date is not defined in file: ${path}`) const dom = new JSDOM(parseMarkdown(markdownRaw)) // add .hljs class to all block codes dom.window.document.querySelectorAll("pre > code").forEach((item) => { item.classList.add("hljs") }) result.content = dom.window.document.documentElement.innerHTML return result as MarkdownData }