render.py: fix PDF kerning rendering bug (#6366)

2021-08-26 13:17:50 +08:00 · 2021-08-26 13:17:50 +08:00 · b0cb85ae98
parent 19e79cd10b
commit b0cb85ae98
1 changed files with 29 additions and 81 deletions
--- a/scripts/pdf/render.py
+++ b/scripts/pdf/render.py
@ -15,106 +15,54 @@ import markdown
 import argparse
 from datetime import datetime
-from weasyprint import HTML
+from weasyprint import HTML, CSS
 def main(loc, colorscheme):
    oslist = []
    allmd = []
    group = []
    ap = []
    # Checking correctness of path
    if not os.path.isdir(loc):
        print("Invalid directory. Please try again!", file=sys.stderr)
        sys.exit(1)
-    # Writing names of all directories inside 'pages' to a list
+    # Set up css style sheets
-    for os_dir in os.listdir(loc):
+    csslist = ["basic.css"]
        oslist.append(os_dir)
    oslist.sort()
    # Required strings to create intermediate HTML files
    header = '<!doctype html><html><head><meta charset="utf-8"><link rel="stylesheet" href="basic.css">'
    if colorscheme != "basic":
-        header += '<link rel="stylesheet" href="' + colorscheme + '.css"></head><body>\n'
+        csslist.append(colorscheme + ".css")
-    header += "</head><body>\n"
+    # A string that stores all pages in HTML format
-    footer = "</body></html>"
+    html = '<!doctype html><html><head><meta charset="utf-8"></head>' \
-    title_content = "<h1 class=title-main>tldr pages</h1>" \
+        +"<body><h1 class=title-main>tldr pages</h1>" \
        + "<h4 class=title-sub>Simplified and community-driven man pages</h4>" \
        + "<h6 class=title-sub><em><small>Generated on " + datetime.now().strftime("%c") + "</small></em></h6>" \
-        + "</body></html>"
+        + '<p style="page-break-before: always" ></p>'
    # Creating title page
    with open("title.html", "w") as f:
        f.write(header + title_content)
    group.append(HTML("title.html").render())
    for operating_sys in oslist:
    # Writing names of all directories inside 'pages' to a list
    for operating_sys in sorted(os.listdir(loc)):
        # Required string to create directory title pages
-        dir_title = "<h2 class=title-dir>" + \
+        html += "<h2 class=title-dir>" + operating_sys.capitalize() + "</h2>" \
-            operating_sys.capitalize() + "</h2></body></html>"
+            + '<p style="page-break-before: always" ></p>'
-        # Creating directory title page for current directory
+        # Conversion of Markdown to HTML string
-        with open("dir_title.html", "w") as os_html:
+        for page_number, md in enumerate(sorted(glob.glob(os.path.join(loc, operating_sys, "*.md"))), start=1):
-            os_html.write(header + dir_title)
+            with open(md, "r") as inp:
-
+                text = inp.readlines()
-        group.append(HTML("dir_title.html").render())
+                for line in text:
-
+                    if re.match(r'^>', line):
-        # Creating a list of all md files in the current directory
+                        line = line[:0] + '####' + line[1:]
-        for temp in glob.glob(os.path.join(loc, operating_sys, "*.md")):
+                    html += markdown.markdown(line)
-            allmd.append(temp)
+            html += '<p style="page-break-before: always" ></p>'
-
+            print(f"Rendered page {page_number} of the directory {operating_sys}")
-        # Sorting all filenames in the directory, to maintain the order of the PDF
+    
-        allmd.sort()
+    html += "</body></html>"
-
+    
-        # Conversion of Markdown to HTML
+    # Writing the PDF to disk
-        for page_number, md in enumerate(allmd, start=1):
+    print("\nConverting all pages to PDF...")
-
+    HTML(string=html).write_pdf("tldr-pages.pdf", stylesheets=csslist)
                with open(md, "r") as inp:
                    text = inp.readlines()
                with open("htmlout.html", "w") as out:
                    out.write(header)
                    for line in text:
                        if re.match(r'^>', line):
                            line = line[:0] + '####' + line[1:]
                        html = markdown.markdown(line)
                        out.write(html)
                    out.write(footer)
                group.append(HTML("htmlout.html").render())
                print("Rendered page {} of the directory {}".format(
                    str(page_number), operating_sys))
        allmd.clear()
    # Merging all the documents into a single PDF
    for doc in group:
        for p in doc.pages:
            ap.append(p)
    # Writing the PDF to disk, preserving metadata of first `tldr` page
    group[2].copy(ap).write_pdf('tldr-pages.pdf')
    if os.path.exists("tldr-pages.pdf"):
        print("\nCreated tldr-pages.pdf in the current directory!\n")
    # Removing unnecessary intermediate files
    try:
        os.remove("htmlout.html")
        os.remove("title.html")
        os.remove("dir_title.html")
    except OSError:
        print("Error removing temporary file(s)")
 if __name__ == "__main__":
    # Parsing the arguments