par(md): add softbreak

par(md): skip empty line...
... instead of clearing new line character
2022-06-12 01:28:56 +03:00 · 2022-06-12 00:58:24 +03:00
2 changed files with 56 additions and 30 deletions
--- a/par/md.test.ts
+++ b/par/md.test.ts
@ -7,12 +7,13 @@ const ren = new HtmlStrRenderer();
 // Misc

 Deno.test({
-  name: "should skip new line character",
+  name: "should skip empty line",
  fn: () => {
    const par = new MarkdownParser();
    assertEquals(ren.render(par.parse("\n")), "");
    assertEquals(ren.render(par.parse("\r\n")), "");
    assertEquals(ren.render(par.parse("\n\r\n")), "");
+    assertEquals(ren.render(par.parse("\n          \n")), "");
  },
 });

@ -56,8 +57,6 @@ Deno.test({
    assertEquals(ren.render(par.parse(" # hello")), "<h1>hello</h1>");
    assertEquals(ren.render(par.parse("  # hello")), "<h1>hello</h1>");
    assertEquals(ren.render(par.parse("   # hello")), "<h1>hello</h1>");
-    assertEquals(ren.render(par.parse("\n   # hello")), "<h1>hello</h1>");
-    assertEquals(ren.render(par.parse("\r\n   # hello")), "<h1>hello</h1>");
  },
 });

@ -101,3 +100,16 @@ Deno.test({
    assertEquals(ren.render(par.parse("hello")), "<p>hello</p>");
  },
 });
+
+Deno.test({
+  name: "should parse paragraph with softbreak",
+  fn: () => {
+    const par = new MarkdownParser();
+
+    const input = `\
+hello
+world`;
+
+    assertEquals(ren.render(par.parse(input)), "<p>hello world</p>");
+  },
+});
--- a/par/md.ts
+++ b/par/md.ts
@ -2,7 +2,7 @@ import { AnyNode, Elem, Fragment, TextNode } from "../core/node.ts";
 import { isNil } from "../core/utils.ts";
 import { Parser } from "./types.ts";

-const RE_NEW_LINE = /^\r?\n/;
+const RE_EMPTY_LINE = /^\s*$/;

 const RE_OPEN_ATX_HEADING = /^\s{0,3}(#{1,6})(\s|$)/;
 const RE_CLOSE_ATX_HEADING = /(^|\s+)#*\s*$/;
@ -12,17 +12,9 @@ export class MarkdownParser implements Parser {
    const astDoc: AstDocument = { kind: AstKind.Document, content: [] };

    let readStr = input;
-
    while (readStr.length) {
-      // 1. clear new line character
-      const match = RE_NEW_LINE.exec(readStr);
-      if (!isNil(match)) {
-        readStr = readStr.slice(match[0].length);
-        continue;
-      }
-
-      // 2. try to find atx heading sequence
-      const newReadStr = parseAtxHeading(astDoc, readStr) ??
+      const newReadStr = skipEmptyLine(readStr) ??
+        parseAtxHeading(astDoc, readStr) ??
        parseParagraph(astDoc, readStr);
      if (isNil(newReadStr)) break;
      readStr = newReadStr;
@ -42,11 +34,15 @@ function DocChild(content: AstDocumentChild): Elem {
 }

 function Heading(ast: AstAtxHeading): Elem {
-  return new Elem(`h${ast.level}`, {}, ast.content.map(Text));
+  return new Elem(`h${ast.level}`, {}, ast.content.map(InlineContent));
 }

 function Paragraph(ast: AstParagraph): Elem {
-  return new Elem("p", {}, ast.content.map(Text));
+  return new Elem("p", {}, ast.content.map(InlineContent));
+}
+
+function InlineContent(ast: AstInlineContent): TextNode {
+  return Text(ast);
 }

 function Text(ast: AstText): TextNode {
@ -55,6 +51,12 @@ function Text(ast: AstText): TextNode {

 // parse utils

+function skipEmptyLine(readStr: string): string | null {
+  const match = RE_EMPTY_LINE.exec(readStr);
+  if (isNil(match)) return null;
+  return readStr.slice(match[0].length);
+}
+
 function parseAtxHeading(ast: AstDocument, readStr: string): string | null {
  const match = RE_OPEN_ATX_HEADING.exec(readStr);
  if (isNil(match)) return null;
@ -94,13 +96,22 @@ function parseParagraph(ast: AstDocument, readStr: string): string | null {
  };
  ast.content.push(paragraph);

-  const paragraphInlineContent = readStr.includes("\n")
-    ? readStr.slice(0, readStr.indexOf("\n") + 1)
-    : readStr;
+  let paragraphInlineContent = "";
+  while (!RE_EMPTY_LINE.test(readStr)) {
+    console.log({ readStr });
+    paragraphInlineContent += readStr.includes("\n")
+      ? readStr.slice(0, readStr.indexOf("\n") + 1)
+      : readStr;
+    readStr = readStr.slice(paragraphInlineContent.length);
+  }

-  parseInlineContent(paragraph, paragraphInlineContent);
+  console.log({ paragraphInlineContent, readStr });

-  return readStr.slice(paragraphInlineContent.length);
+  if (paragraphInlineContent.length) {
+    parseInlineContent(paragraph, paragraphInlineContent);
+  }
+
+  return readStr;
 }

 function parseInlineContent(
@ -109,11 +120,11 @@ function parseInlineContent(
 ): string | null {
  if (!readStr.length) return null;

-  const text: AstText = {
-    kind: AstKind.Text,
-    content: readStr.trim(),
-  };
-  ast.content.push(text);
+  const parts = readStr.split("\n").filter(Boolean).map(
+    (textPart): AstText => ({ kind: AstKind.Text, content: textPart }),
+  );
+
+  ast.content = parts;

  return readStr;
 }
@ -123,16 +134,19 @@ function parseInlineContent(
 type AstDocument = BaseAstItem<AstKind.Document, AstDocumentChild[]>;
 type AstDocumentChild = AstAtxHeading | AstParagraph;

-interface AstAtxHeading extends BaseAstItem<AstKind.AtxHeading, AstText[]> {
+interface AstAtxHeading
+  extends BaseAstItem<AstKind.AtxHeading, AstInlineContent[]> {
  level: HeadingLevel;
 }

-type AstParagraph = BaseAstItem<AstKind.Paragraph, AstText[]>;
+type HeadingLevel = 1 | 2 | 3 | 4 | 5 | 6;
+
+type AstParagraph = BaseAstItem<AstKind.Paragraph, AstInlineContent[]>;
+
+type AstInlineContent = AstText;

 type AstText = BaseAstItem<AstKind.Text, string>;

-type HeadingLevel = 1 | 2 | 3 | 4 | 5 | 6;
-
 interface BaseAstItem<K extends AstKind, Cont> {
  kind: K;
  content: Cont;
Author	SHA1	Message	Date
Dmitriy Pleshevskiy	53c4f4039c	par(md): add softbreak	2022-06-12 01:28:56 +03:00
Dmitriy Pleshevskiy	9551f3ae1e	par(md): skip empty line... ... instead of clearing new line character	2022-06-12 00:58:24 +03:00