render.py: fix PDF kerning rendering bug (#6366)

beep
lincc 2021-08-26 13:17:50 +08:00 committed by GitHub
parent 19e79cd10b
commit b0cb85ae98
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 29 additions and 81 deletions

View File

@ -15,106 +15,54 @@ import markdown
import argparse import argparse
from datetime import datetime from datetime import datetime
from weasyprint import HTML from weasyprint import HTML, CSS
def main(loc, colorscheme): def main(loc, colorscheme):
oslist = []
allmd = []
group = []
ap = []
# Checking correctness of path # Checking correctness of path
if not os.path.isdir(loc): if not os.path.isdir(loc):
print("Invalid directory. Please try again!", file=sys.stderr) print("Invalid directory. Please try again!", file=sys.stderr)
sys.exit(1) sys.exit(1)
# Writing names of all directories inside 'pages' to a list # Set up css style sheets
for os_dir in os.listdir(loc): csslist = ["basic.css"]
oslist.append(os_dir)
oslist.sort()
# Required strings to create intermediate HTML files
header = '<!doctype html><html><head><meta charset="utf-8"><link rel="stylesheet" href="basic.css">'
if colorscheme != "basic": if colorscheme != "basic":
header += '<link rel="stylesheet" href="' + colorscheme + '.css"></head><body>\n' csslist.append(colorscheme + ".css")
header += "</head><body>\n" # A string that stores all pages in HTML format
footer = "</body></html>" html = '<!doctype html><html><head><meta charset="utf-8"></head>' \
title_content = "<h1 class=title-main>tldr pages</h1>" \ +"<body><h1 class=title-main>tldr pages</h1>" \
+ "<h4 class=title-sub>Simplified and community-driven man pages</h4>" \ + "<h4 class=title-sub>Simplified and community-driven man pages</h4>" \
+ "<h6 class=title-sub><em><small>Generated on " + datetime.now().strftime("%c") + "</small></em></h6>" \ + "<h6 class=title-sub><em><small>Generated on " + datetime.now().strftime("%c") + "</small></em></h6>" \
+ "</body></html>" + '<p style="page-break-before: always" ></p>'
# Creating title page
with open("title.html", "w") as f:
f.write(header + title_content)
group.append(HTML("title.html").render())
for operating_sys in oslist:
# Writing names of all directories inside 'pages' to a list
for operating_sys in sorted(os.listdir(loc)):
# Required string to create directory title pages # Required string to create directory title pages
dir_title = "<h2 class=title-dir>" + \ html += "<h2 class=title-dir>" + operating_sys.capitalize() + "</h2>" \
operating_sys.capitalize() + "</h2></body></html>" + '<p style="page-break-before: always" ></p>'
# Creating directory title page for current directory # Conversion of Markdown to HTML string
with open("dir_title.html", "w") as os_html: for page_number, md in enumerate(sorted(glob.glob(os.path.join(loc, operating_sys, "*.md"))), start=1):
os_html.write(header + dir_title) with open(md, "r") as inp:
text = inp.readlines()
group.append(HTML("dir_title.html").render()) for line in text:
if re.match(r'^>', line):
# Creating a list of all md files in the current directory line = line[:0] + '####' + line[1:]
for temp in glob.glob(os.path.join(loc, operating_sys, "*.md")): html += markdown.markdown(line)
allmd.append(temp) html += '<p style="page-break-before: always" ></p>'
print(f"Rendered page {page_number} of the directory {operating_sys}")
# Sorting all filenames in the directory, to maintain the order of the PDF
allmd.sort() html += "</body></html>"
# Conversion of Markdown to HTML # Writing the PDF to disk
for page_number, md in enumerate(allmd, start=1): print("\nConverting all pages to PDF...")
HTML(string=html).write_pdf("tldr-pages.pdf", stylesheets=csslist)
with open(md, "r") as inp:
text = inp.readlines()
with open("htmlout.html", "w") as out:
out.write(header)
for line in text:
if re.match(r'^>', line):
line = line[:0] + '####' + line[1:]
html = markdown.markdown(line)
out.write(html)
out.write(footer)
group.append(HTML("htmlout.html").render())
print("Rendered page {} of the directory {}".format(
str(page_number), operating_sys))
allmd.clear()
# Merging all the documents into a single PDF
for doc in group:
for p in doc.pages:
ap.append(p)
# Writing the PDF to disk, preserving metadata of first `tldr` page
group[2].copy(ap).write_pdf('tldr-pages.pdf')
if os.path.exists("tldr-pages.pdf"): if os.path.exists("tldr-pages.pdf"):
print("\nCreated tldr-pages.pdf in the current directory!\n") print("\nCreated tldr-pages.pdf in the current directory!\n")
# Removing unnecessary intermediate files
try:
os.remove("htmlout.html")
os.remove("title.html")
os.remove("dir_title.html")
except OSError:
print("Error removing temporary file(s)")
if __name__ == "__main__": if __name__ == "__main__":
# Parsing the arguments # Parsing the arguments