From b0cb85ae9821e61b9ea7cf61ee272aff2434bd7b Mon Sep 17 00:00:00 2001
From: lincc <46962923+blueskyson@users.noreply.github.com>
Date: Thu, 26 Aug 2021 13:17:50 +0800
Subject: [PATCH] render.py: fix PDF kerning rendering bug (#6366)

---
 scripts/pdf/render.py | 110 +++++++++++-------------------------------
 1 file changed, 29 insertions(+), 81 deletions(-)
diff --git a/scripts/pdf/render.py b/scripts/pdf/render.py
index 102dacaeb..5985fd393 100644
--- a/scripts/pdf/render.py
+++ b/scripts/pdf/render.py
@@ -15,106 +15,54 @@ import markdown
 import argparse
 from datetime import datetime
 
-from weasyprint import HTML
-
+from weasyprint import HTML, CSS
 
 def main(loc, colorscheme):
 
-    oslist = []
-    allmd = []
-    group = []
-    ap = []
-
     # Checking correctness of path
     if not os.path.isdir(loc):
         print("Invalid directory. Please try again!", file=sys.stderr)
         sys.exit(1)
 
-    # Writing names of all directories inside 'pages' to a list
-    for os_dir in os.listdir(loc):
-        oslist.append(os_dir)
-
-    oslist.sort()
-
-    # Required strings to create intermediate HTML files
-    header = '<!doctype html><html><head><meta charset="utf-8"><link rel="stylesheet" href="basic.css">'
+    # Set up css style sheets
+    csslist = ["basic.css"]
     if colorscheme != "basic":
-        header += '<link rel="stylesheet" href="' + colorscheme + '.css"></head><body>\n'
+        csslist.append(colorscheme + ".css")
 
-    header += "</head><body>\n"
-    footer = "</body></html>"
-    title_content = "<h1 class=title-main>tldr pages</h1>" \
+    # A string that stores all pages in HTML format
+    html = '<!doctype html><html><head><meta charset="utf-8"></head>' \
+        +"<body><h1 class=title-main>tldr pages</h1>" \
         + "<h4 class=title-sub>Simplified and community-driven man pages</h4>" \
         + "<h6 class=title-sub><em><small>Generated on " + datetime.now().strftime("%c") + "</small></em></h6>" \
-        + "</body></html>"
-
-    # Creating title page
-    with open("title.html", "w") as f:
-        f.write(header + title_content)
-
-    group.append(HTML("title.html").render())
-
-    for operating_sys in oslist:
+        + '<p style="page-break-before: always" ></p>'
 
+    # Writing names of all directories inside 'pages' to a list
+    for operating_sys in sorted(os.listdir(loc)):
+        
         # Required string to create directory title pages
-        dir_title = "<h2 class=title-dir>" + \
-            operating_sys.capitalize() + "</h2></body></html>"
+        html += "<h2 class=title-dir>" + operating_sys.capitalize() + "</h2>" \
+            + '<p style="page-break-before: always" ></p>'
 
-        # Creating directory title page for current directory
-        with open("dir_title.html", "w") as os_html:
-            os_html.write(header + dir_title)
-
-        group.append(HTML("dir_title.html").render())
-
-        # Creating a list of all md files in the current directory
-        for temp in glob.glob(os.path.join(loc, operating_sys, "*.md")):
-            allmd.append(temp)
-
-        # Sorting all filenames in the directory, to maintain the order of the PDF
-        allmd.sort()
-
-        # Conversion of Markdown to HTML
-        for page_number, md in enumerate(allmd, start=1):
-
-                with open(md, "r") as inp:
-                    text = inp.readlines()
-
-                with open("htmlout.html", "w") as out:
-                    out.write(header)
-
-                    for line in text:
-                        if re.match(r'^>', line):
-                            line = line[:0] + '####' + line[1:]
-                        html = markdown.markdown(line)
-                        out.write(html)
-                    out.write(footer)
-
-                group.append(HTML("htmlout.html").render())
-                print("Rendered page {} of the directory {}".format(
-                    str(page_number), operating_sys))
-
-        allmd.clear()
-
-    # Merging all the documents into a single PDF
-    for doc in group:
-        for p in doc.pages:
-            ap.append(p)
-
-    # Writing the PDF to disk, preserving metadata of first `tldr` page
-    group[2].copy(ap).write_pdf('tldr-pages.pdf')
+        # Conversion of Markdown to HTML string
+        for page_number, md in enumerate(sorted(glob.glob(os.path.join(loc, operating_sys, "*.md"))), start=1):
+            with open(md, "r") as inp:
+                text = inp.readlines()
+                for line in text:
+                    if re.match(r'^>', line):
+                        line = line[:0] + '####' + line[1:]
+                    html += markdown.markdown(line)
+            html += '<p style="page-break-before: always" ></p>'
+            print(f"Rendered page {page_number} of the directory {operating_sys}")
+    
+    html += "</body></html>"
+    
+    # Writing the PDF to disk
+    print("\nConverting all pages to PDF...")
+    HTML(string=html).write_pdf("tldr-pages.pdf", stylesheets=csslist)
 
     if os.path.exists("tldr-pages.pdf"):
         print("\nCreated tldr-pages.pdf in the current directory!\n")
 
-    # Removing unnecessary intermediate files
-    try:
-        os.remove("htmlout.html")
-        os.remove("title.html")
-        os.remove("dir_title.html")
-    except OSError:
-        print("Error removing temporary file(s)")
-
-
 if __name__ == "__main__":
 
     # Parsing the arguments