render.py: fix PDF kerning rendering bug (#6366)

beep
lincc 2021-08-26 13:17:50 +08:00 committed by GitHub
parent 19e79cd10b
commit b0cb85ae98
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 29 additions and 81 deletions

View File

@ -15,106 +15,54 @@ import markdown
import argparse
from datetime import datetime
from weasyprint import HTML
from weasyprint import HTML, CSS
def main(loc, colorscheme):
oslist = []
allmd = []
group = []
ap = []
# Checking correctness of path
if not os.path.isdir(loc):
print("Invalid directory. Please try again!", file=sys.stderr)
sys.exit(1)
# Writing names of all directories inside 'pages' to a list
for os_dir in os.listdir(loc):
oslist.append(os_dir)
oslist.sort()
# Required strings to create intermediate HTML files
header = '<!doctype html><html><head><meta charset="utf-8"><link rel="stylesheet" href="basic.css">'
# Set up css style sheets
csslist = ["basic.css"]
if colorscheme != "basic":
header += '<link rel="stylesheet" href="' + colorscheme + '.css"></head><body>\n'
csslist.append(colorscheme + ".css")
header += "</head><body>\n"
footer = "</body></html>"
title_content = "<h1 class=title-main>tldr pages</h1>" \
# A string that stores all pages in HTML format
html = '<!doctype html><html><head><meta charset="utf-8"></head>' \
+"<body><h1 class=title-main>tldr pages</h1>" \
+ "<h4 class=title-sub>Simplified and community-driven man pages</h4>" \
+ "<h6 class=title-sub><em><small>Generated on " + datetime.now().strftime("%c") + "</small></em></h6>" \
+ "</body></html>"
# Creating title page
with open("title.html", "w") as f:
f.write(header + title_content)
group.append(HTML("title.html").render())
for operating_sys in oslist:
+ '<p style="page-break-before: always" ></p>'
# Writing names of all directories inside 'pages' to a list
for operating_sys in sorted(os.listdir(loc)):
# Required string to create directory title pages
dir_title = "<h2 class=title-dir>" + \
operating_sys.capitalize() + "</h2></body></html>"
html += "<h2 class=title-dir>" + operating_sys.capitalize() + "</h2>" \
+ '<p style="page-break-before: always" ></p>'
# Creating directory title page for current directory
with open("dir_title.html", "w") as os_html:
os_html.write(header + dir_title)
group.append(HTML("dir_title.html").render())
# Creating a list of all md files in the current directory
for temp in glob.glob(os.path.join(loc, operating_sys, "*.md")):
allmd.append(temp)
# Sorting all filenames in the directory, to maintain the order of the PDF
allmd.sort()
# Conversion of Markdown to HTML
for page_number, md in enumerate(allmd, start=1):
with open(md, "r") as inp:
text = inp.readlines()
with open("htmlout.html", "w") as out:
out.write(header)
for line in text:
if re.match(r'^>', line):
line = line[:0] + '####' + line[1:]
html = markdown.markdown(line)
out.write(html)
out.write(footer)
group.append(HTML("htmlout.html").render())
print("Rendered page {} of the directory {}".format(
str(page_number), operating_sys))
allmd.clear()
# Merging all the documents into a single PDF
for doc in group:
for p in doc.pages:
ap.append(p)
# Writing the PDF to disk, preserving metadata of first `tldr` page
group[2].copy(ap).write_pdf('tldr-pages.pdf')
# Conversion of Markdown to HTML string
for page_number, md in enumerate(sorted(glob.glob(os.path.join(loc, operating_sys, "*.md"))), start=1):
with open(md, "r") as inp:
text = inp.readlines()
for line in text:
if re.match(r'^>', line):
line = line[:0] + '####' + line[1:]
html += markdown.markdown(line)
html += '<p style="page-break-before: always" ></p>'
print(f"Rendered page {page_number} of the directory {operating_sys}")
html += "</body></html>"
# Writing the PDF to disk
print("\nConverting all pages to PDF...")
HTML(string=html).write_pdf("tldr-pages.pdf", stylesheets=csslist)
if os.path.exists("tldr-pages.pdf"):
print("\nCreated tldr-pages.pdf in the current directory!\n")
# Removing unnecessary intermediate files
try:
os.remove("htmlout.html")
os.remove("title.html")
os.remove("dir_title.html")
except OSError:
print("Error removing temporary file(s)")
if __name__ == "__main__":
# Parsing the arguments