split markdown parsing code
This commit is contained in:
parent
868b386625
commit
6a43e2e44d
10 changed files with 285 additions and 250 deletions
5
source/generate/config.ts
Normal file
5
source/generate/config.ts
Normal file
|
@ -0,0 +1,5 @@
|
|||
export const markdownPath = "./markdown" // where it will look for markdown documents
|
||||
export const outPath = "./src/data" // path to the json database
|
||||
|
||||
export const contentDirectoryPath = `${outPath}/content`
|
||||
export const mapFilePath = `${outPath}/map.json`
|
99
source/generate/index.ts
Normal file
99
source/generate/index.ts
Normal file
|
@ -0,0 +1,99 @@
|
|||
/**
|
||||
* @file Read markdown files and write their content and metadata to json files which can then be imported by React.
|
||||
* - File and directory names starting with an underscore (_) are ignored.
|
||||
* - Symbolic links are not supported.
|
||||
* - The filename-to-URL converter isn't perfect. Some non-URL-friendly filenames might cause problems.
|
||||
* - series must start with a number followed by an underscore
|
||||
*/
|
||||
|
||||
import fs from "fs" // read and write files
|
||||
|
||||
import { Map, SeriesMap } from "./types"
|
||||
import { recursiveParse } from "./recursiveParse"
|
||||
import { contentDirectoryPath, mapFilePath, markdownPath } from "./config"
|
||||
import { saveIndex } from "./searchIndex"
|
||||
|
||||
// searchable data that will be converted to JSON string
|
||||
export const map: Map = {
|
||||
date: {},
|
||||
tags: {},
|
||||
meta: {
|
||||
tags: [],
|
||||
},
|
||||
posts: {},
|
||||
series: {},
|
||||
unsearchable: {},
|
||||
}
|
||||
export const seriesMap: SeriesMap = {}
|
||||
|
||||
/**
|
||||
* Delete existing files
|
||||
*/
|
||||
|
||||
try {
|
||||
fs.rmSync(contentDirectoryPath, { recursive: true })
|
||||
// eslint-disable-next-line no-empty
|
||||
} catch (err) {}
|
||||
|
||||
try {
|
||||
fs.unlinkSync(mapFilePath)
|
||||
// eslint-disable-next-line no-empty
|
||||
} catch (err) {}
|
||||
|
||||
// check if it's a directory and start recursive parse function
|
||||
if (!fs.lstatSync(markdownPath).isDirectory())
|
||||
throw Error("Invalid markdown path")
|
||||
|
||||
if (!fs.lstatSync(markdownPath + "/posts").isDirectory())
|
||||
throw Error(`Cannot find directory: ${markdownPath + "/posts"}`)
|
||||
|
||||
if (!fs.lstatSync(markdownPath + "/unsearchable").isDirectory())
|
||||
throw Error(`Cannot find directory: ${markdownPath + "/posts"}`)
|
||||
|
||||
if (!fs.lstatSync(markdownPath + "/series").isDirectory())
|
||||
throw Error(`Cannot find directory: ${markdownPath + "/posts"}`)
|
||||
|
||||
recursiveParse("posts", markdownPath + "/posts")
|
||||
recursiveParse("unsearchable", markdownPath + "/unsearchable")
|
||||
recursiveParse("series", markdownPath + "/series")
|
||||
|
||||
// sort dates
|
||||
let dateKeys: string[] = []
|
||||
for (const dateKey in map.date) {
|
||||
dateKeys.push(dateKey)
|
||||
}
|
||||
|
||||
dateKeys = dateKeys.sort()
|
||||
|
||||
const TmpDate = map.date
|
||||
map.date = {}
|
||||
|
||||
dateKeys.forEach((sortedDateKey) => {
|
||||
map.date[sortedDateKey] = TmpDate[sortedDateKey]
|
||||
})
|
||||
|
||||
// fill meta data
|
||||
for (const tag in map.tags) {
|
||||
map.meta.tags.push(tag)
|
||||
}
|
||||
|
||||
// sort series post
|
||||
for (const seriesURL in seriesMap) {
|
||||
seriesMap[seriesURL].sort((a, b) => {
|
||||
if (a.index < b.index) {
|
||||
return -1
|
||||
}
|
||||
if (a.index > b.index) {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
})
|
||||
}
|
||||
|
||||
for (const seriesURL in seriesMap) {
|
||||
map.series[seriesURL].length = seriesMap[seriesURL].length
|
||||
map.series[seriesURL].order = seriesMap[seriesURL].map((item) => item.url)
|
||||
}
|
||||
|
||||
fs.writeFileSync(mapFilePath, JSON.stringify(map))
|
||||
saveIndex()
|
33
source/generate/parseMarkdown.ts
Normal file
33
source/generate/parseMarkdown.ts
Normal file
|
@ -0,0 +1,33 @@
|
|||
import markdownIt from "markdown-it" // rendering markdown
|
||||
import markdownItTexMath from "markdown-it-texmath" // rendering mathematical expression
|
||||
import markdownItAnchor from "markdown-it-anchor" // markdown anchor
|
||||
import hljs from "highlight.js" // code block highlighting
|
||||
import katex from "katex" // rendering mathematical expression
|
||||
import { nthIndex } from "./util"
|
||||
|
||||
const md = markdownIt({
|
||||
// https://github.com/highlightjs/highlight.js/blob/main/SUPPORTED_LANGUAGES.md
|
||||
highlight: function (str, lang) {
|
||||
if (lang && hljs.getLanguage(lang)) {
|
||||
try {
|
||||
return hljs.highlight(str, { language: lang }).value
|
||||
// eslint-disable-next-line no-empty
|
||||
} catch (error) {}
|
||||
}
|
||||
|
||||
return "" // use external default escaping
|
||||
},
|
||||
html: true,
|
||||
})
|
||||
.use(markdownItTexMath, {
|
||||
engine: katex,
|
||||
delimiters: "dollars",
|
||||
katexOptions: { macros: { "\\RR": "\\mathbb{R}" } },
|
||||
})
|
||||
.use(markdownItAnchor, {})
|
||||
|
||||
export function parseMarkdown(markdownRaw: string): string {
|
||||
return (
|
||||
md.render(markdownRaw.slice(nthIndex(markdownRaw, "---", 2) + 3)) || ""
|
||||
)
|
||||
}
|
|
@ -1,169 +1,18 @@
|
|||
/**
|
||||
* @file Read markdown files and write their content and metadata to json files which can then be imported by React.
|
||||
* - File and directory names starting with an underscore (_) are ignored.
|
||||
* - Symbolic links are not supported.
|
||||
* - The filename-to-URL converter isn't perfect. Some non-URL-friendly filenames might cause problems.
|
||||
* - series must start with a number followed by an underscore
|
||||
*/
|
||||
|
||||
import fs from "fs" // read and write files
|
||||
import path from "path" // get relative path
|
||||
import elasticlunr from "elasticlunr" // search index generation
|
||||
import fs from "fs"
|
||||
import readTimeEstimate from "read-time-estimate" // post read time estimation
|
||||
import matter from "gray-matter" // parse markdown metadata
|
||||
import toc from "markdown-toc" // table of contents generation
|
||||
import markdownIt from "markdown-it" // rendering markdown
|
||||
import markdownItTexMath from "markdown-it-texmath" // rendering mathematical expression
|
||||
import markdownItAnchor from "markdown-it-anchor" // markdown anchor
|
||||
import hljs from "highlight.js" // code block highlighting
|
||||
import katex from "katex" // rendering mathematical expression
|
||||
import { JSDOM } from "jsdom" // HTML DOM parsing
|
||||
|
||||
const markdownPath = "./markdown" // where it will look for markdown documents
|
||||
const outPath = "./src/data" // path to the json database
|
||||
import { nthIndex, path2FileOrFolderName, path2URL, writeToJSON } from "./util"
|
||||
import { parseMarkdown } from "./parseMarkdown"
|
||||
|
||||
const contentDirectoryPath = `${outPath}/content`
|
||||
const mapFilePath = `${outPath}/map.json`
|
||||
|
||||
interface Map {
|
||||
// key: YYYY-MM-DD
|
||||
// value: url
|
||||
date: {
|
||||
[key: string]: string[]
|
||||
}
|
||||
|
||||
// key: tag name
|
||||
// value: url
|
||||
tags: {
|
||||
[key: string]: string[]
|
||||
}
|
||||
|
||||
// list of all meta data
|
||||
meta: {
|
||||
tags: string[]
|
||||
}
|
||||
|
||||
// searchable, non-series posts
|
||||
// must have a post date
|
||||
// tag is not required
|
||||
posts: {
|
||||
[key: string]: {
|
||||
title: string
|
||||
date: string
|
||||
tags: string[]
|
||||
preview: string
|
||||
}
|
||||
}
|
||||
|
||||
// series posts have "previous post" and "next post" button so they need to be ordered
|
||||
series: {
|
||||
[key: string]: {
|
||||
title: string
|
||||
length: number
|
||||
order: string[] // url order
|
||||
tags: string[]
|
||||
}
|
||||
}
|
||||
|
||||
// urls of unsearchable posts
|
||||
// it is here to quickly check if a post exists or not
|
||||
unsearchable: {
|
||||
[key: string]: {
|
||||
title: string
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
interface SeriesMap {
|
||||
// key: url
|
||||
[key: string]: {
|
||||
index: number
|
||||
url: string
|
||||
}[]
|
||||
}
|
||||
|
||||
// searchable data that will be converted to JSON string
|
||||
const map: Map = {
|
||||
date: {},
|
||||
tags: {},
|
||||
meta: {
|
||||
tags: [],
|
||||
},
|
||||
posts: {},
|
||||
series: {},
|
||||
unsearchable: {},
|
||||
}
|
||||
const seriesMap: SeriesMap = {}
|
||||
const elasticlunrIndex = elasticlunr(function () {
|
||||
this.addField("title" as never)
|
||||
this.addField("body" as never)
|
||||
this.setRef("url" as never)
|
||||
})
|
||||
|
||||
const md = markdownIt({
|
||||
// https://github.com/highlightjs/highlight.js/blob/main/SUPPORTED_LANGUAGES.md
|
||||
highlight: function (str, lang) {
|
||||
if (lang && hljs.getLanguage(lang)) {
|
||||
try {
|
||||
return hljs.highlight(str, { language: lang }).value
|
||||
// eslint-disable-next-line no-empty
|
||||
} catch (error) {}
|
||||
}
|
||||
|
||||
return "" // use external default escaping
|
||||
},
|
||||
html: true,
|
||||
})
|
||||
.use(markdownItTexMath, {
|
||||
engine: katex,
|
||||
delimiters: "dollars",
|
||||
katexOptions: { macros: { "\\RR": "\\mathbb{R}" } },
|
||||
})
|
||||
.use(markdownItAnchor, {})
|
||||
|
||||
// converts file path to url
|
||||
function path2URL(pathToConvert: string): string {
|
||||
return `/${path.relative(markdownPath, pathToConvert)}`
|
||||
.replace(/\.[^/.]+$/, "") // remove the file extension
|
||||
.replace(/ /g, "-") // replace all space with a dash
|
||||
}
|
||||
|
||||
// gets the text after the last slash
|
||||
function path2FileOrFolderName(inputPath: string): string {
|
||||
// remove trailing slash
|
||||
if (inputPath[-1] == "/")
|
||||
inputPath = inputPath.slice(0, inputPath.length - 1)
|
||||
|
||||
// get the last section
|
||||
return inputPath.slice(inputPath.lastIndexOf("/") + 1)
|
||||
}
|
||||
|
||||
// gets the nth occurance of a pattern in string
|
||||
// returns -1 if nothing is found
|
||||
// https://stackoverflow.com/a/14482123/12979111
|
||||
function nthIndex(str: string, pat: string, n: number) {
|
||||
let i = -1
|
||||
|
||||
while (n-- && i++ < str.length) {
|
||||
i = str.indexOf(pat, i)
|
||||
if (i < 0) break
|
||||
}
|
||||
|
||||
return i
|
||||
}
|
||||
|
||||
function writeToJSON(JSONFilePath: string, dataToWrite: string) {
|
||||
// create directory to put json content files
|
||||
fs.mkdirSync(JSONFilePath.slice(0, JSONFilePath.lastIndexOf("/")), {
|
||||
recursive: true,
|
||||
})
|
||||
|
||||
// write content to json file
|
||||
fs.writeFileSync(JSONFilePath, dataToWrite)
|
||||
}
|
||||
import { contentDirectoryPath } from "./config"
|
||||
import { addDocument } from "./searchIndex"
|
||||
import { map, seriesMap } from "."
|
||||
|
||||
// A recursive function that calls itself for every files and directories that it finds
|
||||
function recursiveParse(
|
||||
export function recursiveParse(
|
||||
mode: "posts" | "series" | "unsearchable",
|
||||
fileOrFolderPath: string
|
||||
) {
|
||||
|
@ -211,10 +60,7 @@ function recursiveParse(
|
|||
if (!markdownData.title)
|
||||
throw Error(`Title is not defined in file: ${fileOrFolderPath}`)
|
||||
|
||||
const dom = new JSDOM(
|
||||
md.render(markdownRaw.slice(nthIndex(markdownRaw, "---", 2) + 3)) ||
|
||||
""
|
||||
)
|
||||
const dom = new JSDOM(parseMarkdown(markdownRaw))
|
||||
|
||||
// add .hljs to all block codes
|
||||
dom.window.document.querySelectorAll("pre > code").forEach((item) => {
|
||||
|
@ -296,7 +142,7 @@ function recursiveParse(
|
|||
}
|
||||
|
||||
map.posts[urlPath] = postData
|
||||
elasticlunrIndex.addDoc({
|
||||
addDocument({
|
||||
title: markdownData.title,
|
||||
body: markdownData.content,
|
||||
url: urlPath,
|
||||
|
@ -322,7 +168,7 @@ function recursiveParse(
|
|||
title: markdownData.title,
|
||||
}
|
||||
|
||||
elasticlunrIndex.addDoc({
|
||||
addDocument({
|
||||
title: markdownData.title,
|
||||
body: markdownData.content,
|
||||
url: urlPath,
|
||||
|
@ -404,7 +250,7 @@ function recursiveParse(
|
|||
map.series[urlPath] = { ...postData, order: [], length: 0 }
|
||||
} else {
|
||||
map.posts[urlPath] = postData
|
||||
elasticlunrIndex.addDoc({
|
||||
addDocument({
|
||||
title: markdownData.title,
|
||||
body: markdownData.content,
|
||||
url: urlPath,
|
||||
|
@ -443,77 +289,3 @@ function recursiveParse(
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Actual logic starts here
|
||||
*/
|
||||
|
||||
// Delete existing files
|
||||
|
||||
try {
|
||||
fs.rmSync(contentDirectoryPath, { recursive: true })
|
||||
// eslint-disable-next-line no-empty
|
||||
} catch (err) {}
|
||||
|
||||
try {
|
||||
fs.unlinkSync(mapFilePath)
|
||||
// eslint-disable-next-line no-empty
|
||||
} catch (err) {}
|
||||
|
||||
// check if it's a directory and start recursive parse function
|
||||
if (!fs.lstatSync(markdownPath).isDirectory())
|
||||
throw Error("Invalid markdown path")
|
||||
|
||||
if (!fs.lstatSync(markdownPath + "/posts").isDirectory())
|
||||
throw Error(`Cannot find directory: ${markdownPath + "/posts"}`)
|
||||
|
||||
if (!fs.lstatSync(markdownPath + "/unsearchable").isDirectory())
|
||||
throw Error(`Cannot find directory: ${markdownPath + "/posts"}`)
|
||||
|
||||
if (!fs.lstatSync(markdownPath + "/series").isDirectory())
|
||||
throw Error(`Cannot find directory: ${markdownPath + "/posts"}`)
|
||||
|
||||
recursiveParse("posts", markdownPath + "/posts")
|
||||
recursiveParse("unsearchable", markdownPath + "/unsearchable")
|
||||
recursiveParse("series", markdownPath + "/series")
|
||||
|
||||
// sort dates
|
||||
let dateKeys: string[] = []
|
||||
for (const dateKey in map.date) {
|
||||
dateKeys.push(dateKey)
|
||||
}
|
||||
|
||||
dateKeys = dateKeys.sort()
|
||||
|
||||
const TmpDate = map.date
|
||||
map.date = {}
|
||||
|
||||
dateKeys.forEach((sortedDateKey) => {
|
||||
map.date[sortedDateKey] = TmpDate[sortedDateKey]
|
||||
})
|
||||
|
||||
// fill meta data
|
||||
for (const tag in map.tags) {
|
||||
map.meta.tags.push(tag)
|
||||
}
|
||||
|
||||
// sort series post
|
||||
for (const seriesURL in seriesMap) {
|
||||
seriesMap[seriesURL].sort((a, b) => {
|
||||
if (a.index < b.index) {
|
||||
return -1
|
||||
}
|
||||
if (a.index > b.index) {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
})
|
||||
}
|
||||
|
||||
for (const seriesURL in seriesMap) {
|
||||
map.series[seriesURL].length = seriesMap[seriesURL].length
|
||||
map.series[seriesURL].order = seriesMap[seriesURL].map((item) => item.url)
|
||||
}
|
||||
|
||||
fs.writeFileSync(mapFilePath, JSON.stringify(map))
|
||||
fs.writeFileSync(outPath + "/search.json", JSON.stringify(elasticlunrIndex))
|
22
source/generate/searchIndex.ts
Normal file
22
source/generate/searchIndex.ts
Normal file
|
@ -0,0 +1,22 @@
|
|||
/**
|
||||
* @file generate index for searching
|
||||
*/
|
||||
|
||||
import fs from "fs"
|
||||
import elasticlunr from "elasticlunr"
|
||||
|
||||
import { outPath } from "./config"
|
||||
|
||||
const elasticlunrIndex = elasticlunr(function () {
|
||||
this.addField("title" as never)
|
||||
this.addField("body" as never)
|
||||
this.setRef("url" as never)
|
||||
})
|
||||
|
||||
export function addDocument(doc: unknown) {
|
||||
elasticlunrIndex.addDoc(doc)
|
||||
}
|
||||
|
||||
export function saveIndex() {
|
||||
fs.writeFileSync(outPath + "/search.json", JSON.stringify(elasticlunrIndex))
|
||||
}
|
56
source/generate/types.ts
Normal file
56
source/generate/types.ts
Normal file
|
@ -0,0 +1,56 @@
|
|||
export interface Map {
|
||||
// key: YYYY-MM-DD
|
||||
// value: url
|
||||
date: {
|
||||
[key: string]: string[]
|
||||
}
|
||||
|
||||
// key: tag name
|
||||
// value: url
|
||||
tags: {
|
||||
[key: string]: string[]
|
||||
}
|
||||
|
||||
// list of all meta data
|
||||
meta: {
|
||||
tags: string[]
|
||||
}
|
||||
|
||||
// searchable, non-series posts
|
||||
// must have a post date
|
||||
// tag is not required
|
||||
posts: {
|
||||
[key: string]: {
|
||||
title: string
|
||||
date: string
|
||||
tags: string[]
|
||||
preview: string
|
||||
}
|
||||
}
|
||||
|
||||
// series posts have "previous post" and "next post" button so they need to be ordered
|
||||
series: {
|
||||
[key: string]: {
|
||||
title: string
|
||||
length: number
|
||||
order: string[] // url order
|
||||
tags: string[]
|
||||
}
|
||||
}
|
||||
|
||||
// urls of unsearchable posts
|
||||
// it is here to quickly check if a post exists or not
|
||||
unsearchable: {
|
||||
[key: string]: {
|
||||
title: string
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export interface SeriesMap {
|
||||
// key: url
|
||||
[key: string]: {
|
||||
index: number
|
||||
url: string
|
||||
}[]
|
||||
}
|
45
source/generate/util.ts
Normal file
45
source/generate/util.ts
Normal file
|
@ -0,0 +1,45 @@
|
|||
import fs from "fs"
|
||||
import { relative } from "path"
|
||||
|
||||
import { markdownPath } from "./config"
|
||||
|
||||
// converts file path to url
|
||||
export function path2URL(pathToConvert: string): string {
|
||||
return `/${relative(markdownPath, pathToConvert)}`
|
||||
.replace(/\.[^/.]+$/, "") // remove the file extension
|
||||
.replace(/ /g, "-") // replace all space with a dash
|
||||
}
|
||||
|
||||
// gets the text after the last slash
|
||||
export function path2FileOrFolderName(inputPath: string): string {
|
||||
// remove trailing slash
|
||||
if (inputPath[-1] == "/")
|
||||
inputPath = inputPath.slice(0, inputPath.length - 1)
|
||||
|
||||
// get the last section
|
||||
return inputPath.slice(inputPath.lastIndexOf("/") + 1)
|
||||
}
|
||||
|
||||
// gets the nth occurance of a pattern in string
|
||||
// returns -1 if nothing is found
|
||||
// https://stackoverflow.com/a/14482123/12979111
|
||||
export function nthIndex(str: string, pat: string, n: number) {
|
||||
let i = -1
|
||||
|
||||
while (n-- && i++ < str.length) {
|
||||
i = str.indexOf(pat, i)
|
||||
if (i < 0) break
|
||||
}
|
||||
|
||||
return i
|
||||
}
|
||||
|
||||
export function writeToJSON(JSONFilePath: string, dataToWrite: string) {
|
||||
// create directory to put json content files
|
||||
fs.mkdirSync(JSONFilePath.slice(0, JSONFilePath.lastIndexOf("/")), {
|
||||
recursive: true,
|
||||
})
|
||||
|
||||
// write content to json file
|
||||
fs.writeFileSync(JSONFilePath, dataToWrite)
|
||||
}
|
|
@ -11,7 +11,7 @@
|
|||
"private": true,
|
||||
"license": "MIT",
|
||||
"scripts": {
|
||||
"generate": "ts-node -O '{\"module\":\"commonjs\"}' --files ./generate.ts",
|
||||
"generate": "ts-node -O '{\"module\":\"commonjs\"}' --files ./generate",
|
||||
"start": "yarn generate && react-scripts start",
|
||||
"quick-start": "react-scripts start",
|
||||
"build": "yarn generate && react-scripts build"
|
||||
|
|
|
@ -1,15 +1,17 @@
|
|||
/** PostList.tsx
|
||||
* show posts in recent order
|
||||
/**
|
||||
* PostList.tsx
|
||||
* show posts in recent order
|
||||
*/
|
||||
|
||||
import React, { useEffect, useState } from "react"
|
||||
import styled from "styled-components"
|
||||
import { useEffect, useState } from "react"
|
||||
import { Helmet } from "react-helmet-async"
|
||||
|
||||
import theming from "../styles/theming"
|
||||
import _map from "../data/map.json"
|
||||
import styled from "styled-components"
|
||||
|
||||
import PostCard from "../components/PostCard"
|
||||
|
||||
import _map from "../data/map.json"
|
||||
import theming from "../styles/theming"
|
||||
|
||||
import { Map } from "../types/typings"
|
||||
|
||||
const map: Map = _map
|
||||
|
@ -31,16 +33,17 @@ interface Props {
|
|||
|
||||
const PostList = (props: Props) => {
|
||||
const howMany = props.howMany || 0
|
||||
const [postCards, setPostCards] = useState([] as unknown[])
|
||||
const [postCards, setPostCards] = useState<JSX.Element[]>([])
|
||||
|
||||
useEffect(() => {
|
||||
let postCount = 0
|
||||
const _postCards = [] as unknown[]
|
||||
const _postCards = [] as JSX.Element[]
|
||||
|
||||
for (const date in map.date) {
|
||||
if (postCount >= howMany) break
|
||||
|
||||
const length = map.date[date].length
|
||||
|
||||
for (let i = 0; i < length; i++) {
|
||||
if (postCount >= howMany) break
|
||||
|
||||
|
|
|
@ -17,5 +17,5 @@
|
|||
"noEmit": true,
|
||||
"jsx": "react-jsx"
|
||||
},
|
||||
"include": ["src/**/*", "types/**/*", "generate.ts"]
|
||||
"include": ["src/**/*", "types/**/*", "generate/**/*"]
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue