const md2tokens = (markdown, newline) => { // Simple lexer and parser let i = 0; let current_string = ""; newline = newline == null ? true : newline; let newline_count = newline ? 1 : 0; let tokens = []; let match = (c) => { if (c == markdown[i]) { i++; return true; } return false; } let peek = (a) => { return markdown[i + a]; } let advance = () => { i++; if (i < markdown.length) return true return false } let capture_until = (c) => { return capture_until_predicate((i) => markdown[i] == c); } let capture_until_predicate = (f) => { let capture = ""; while (!f(i) && markdown.length > i) { capture += markdown[i]; i++; } return capture; } let finish = (type) => { if (current_string.trim().length == 0) { current_string = ""; return; } tokens.push({ type: type == null ? "span" : type, content: current_string }) current_string = ""; } let def = () => { current_string += markdown[i]; newline = false; newline_count = 0; } while (i < markdown.length) { switch (markdown[i]) { case '\\': i++; const match_word = (word) => { for (let j = 0; j < word.length; j++) { if (peek(j) != word[j]) return false; } i += word.length; return true; }; if (match_word("toc")) { tokens.push({ type: "toc" }); break; } if (match_word("title")) { tokens.push({ type: "title" }); break; } current_string += markdown[i]; break; case '*': finish(); advance(); newline = false; newline_count = 0; let depth_em = 1; let type = "em"; if (match("*")) { type = "bold"; depth_em = 2; if (match("*")) { type = "embold"; depth_em = 3; } } current_string = capture_until_predicate((i) => { let found = true; for (let j = 0; j < depth_em; j++) { if (markdown[i + j] != "*") { found = false; break; } } return found; }) for (let j = 0; j < depth_em - 1; j++) { // console.log(markdown[i] == "\n"); advance(); } finish(type); break; case "#": if (!newline) { def(); break; } finish(); let depth_he = 0; do { depth_he++; } while (match("#")); advance(); while (markdown[i] != "\n" && markdown.length > i) { current_string += markdown[i]; i++ } tokens.push({ type: "header", level: depth_he, content: current_string }); current_string = ""; newline_count = 2; break; case '!': if (markdown[i + 1] != "[") { def(); break; } finish(); i += 2; let alt_text = capture_until("]"); advance(); if (!match("(")) { current_string = `![${alt_text}]`; def(); break; } let url = capture_until_predicate((i) => markdown[i] == "\"" || markdown[i] == ")"); let title = ""; if (match("\"")) { // has title title = capture_until("\""); advance(); } if (!match(")")) { current_string = `; break; } tokens.push({ type: "image", alt: alt_text, url: url, title: title }) break; case '>': case '-': if (!newline) { def(); break; } if (peek(0) == "-" && peek(1) == "-" && peek(2) == "-" && peek(3) == "\n") { i += 3; tokens.push({ type: "hline" }); break; } let type_l = peek(0) == '-' ? "ul" : "quote"; let ch = peek(0); let items = []; advance(); do { match(" "); let text = capture_until("\n"); advance(); items.push(md2tokens(text, false)); } while (match(ch)); tokens.push({ type: type_l, items: items }); break; case '\n': if ((peek(-1) != " " || peek(-2) != " ") && peek(-1) != "\n") { current_string += " "; tokens.push({ type: "span", content: current_string }); current_string = ""; newline = true; break; } if (newline_count > 2) break; newline = true; newline_count++; finish(); tokens.push({ type: "newline" }) if (newline_count == 1 && peek(-1) == "\n") tokens.push({ type: "newline" }) break; case '[': finish(); advance(); newline = false; newline_count = 0; let text = capture_until("]"); advance(); if (!match("(")) { current_string = `[${text}]`; def(); break; } let link = capture_until(")"); tokens.push({ type: "link", link: link, text: text }); break; case '`': finish(); advance(); newline = false; newline_count = 0; let stop = (i) => markdown[i] == "`"; let type_c = "inlinecode"; let language = ""; if (peek(0) == "`" && peek(1) == "`") { // multiline code block advance(); advance(); language = capture_until("\n"); let f = (i) => markdown[i] == "`"; stop = (i) => f(i) && f(i + 1) && f(i + 2); type_c = "codeblock"; } current_string = capture_until_predicate(stop); tokens.push({ type: type_c, content: current_string, language: language }); current_string = ""; if (type_c == "codeblock") { advance(); advance(); newline_count = 2; } // remove trailing ` break; default: def() } i++; } finish(); console.log(tokens); return tokens; }; const highlight_code = (language, code) => { let map = maps[language]; if (map == undefined) return code; return highlight(map, code); } const tokens2html = (tokens, title) => { let output = ""; for (let token of tokens) { switch (token.type) { case "toc": let headers = tokens.filter(x => x.type == "header"); let listHTML = ''; let currentLevel = 0; headers.forEach(header => { let indent = ' '.repeat(header.level - 1); // Indentation based on header level // Check if we need to create a new
${token.title}
\n"; for (let item of token.items) { output += `${tokens2html(item)}\n`; } output += ""; break; case "inlinecode": output += `
${token.content}
`;
break;
case "codeblock":
output += `${highlight_code(token.language, token.content)}` break; case "link": output += `${token.text}`; break; default: throw "Unknown token type " + token.type; } } return output; }