Apply black

This commit is contained in:
Adrian Vollmer 2024-03-31 10:56:04 +02:00
parent c58206b892
commit 0bde114839
4 changed files with 140 additions and 120 deletions

View File

@ -1,8 +1,10 @@
def main(): def main():
from .args import parse_args from .args import parse_args
args = parse_args() args = parse_args()
from .embed import embed_assets, extract_assets from .embed import embed_assets, extract_assets
if args.revert: if args.revert:
extract_assets( extract_assets(
args.input_path, args.input_path,

View File

@ -10,7 +10,9 @@ except ImportError:
try: try:
__version__ = importlib_metadata.version(__package__ or __name__) __version__ = importlib_metadata.version(__package__ or __name__)
# __summary__ = importlib_metadata.metadata(__package__ or __name__)['summary'] # __summary__ = importlib_metadata.metadata(__package__ or __name__)['summary']
__summary__ = "Bundle assets of distributed HTML docs into one self-contained HTML file" __summary__ = (
"Bundle assets of distributed HTML docs into one self-contained HTML file"
)
except importlib_metadata.PackageNotFoundError: except importlib_metadata.PackageNotFoundError:
__version__ = "??" __version__ = "??"
__summary__ = "??" __summary__ = "??"
@ -21,40 +23,46 @@ parser = argparse.ArgumentParser(
) )
parser.add_argument( parser.add_argument(
'-v', '--version', action='version', "-v",
"--version",
action="version",
version=__version__, version=__version__,
) )
parser.add_argument( parser.add_argument(
'-R', '--revert', "-R",
"--revert",
default=False, default=False,
action='store_true', action="store_true",
help='set this flag for the reverse operation: deflate a Zundler file' help="set this flag for the reverse operation: deflate a Zundler file"
' into its components (ouput_path must be a directory; "." by default)', ' into its components (ouput_path must be a directory; "." by default)',
) )
parser.add_argument( parser.add_argument(
'-P', '--append-pre', "-P",
"--append-pre",
default="", default="",
help='append JS code to inject_pre script (default: %(default)s)', help="append JS code to inject_pre script (default: %(default)s)",
) )
parser.add_argument( parser.add_argument(
'-p', '--append-post', "-p",
"--append-post",
default="", default="",
help='append JS code to inject_post script (default: %(default)s)', help="append JS code to inject_post script (default: %(default)s)",
) )
parser.add_argument( parser.add_argument(
'input_path', "input_path",
help='input path to the root HTML file', help="input path to the root HTML file",
) )
parser.add_argument( parser.add_argument(
'-o', '--output-path', "-o",
help='output path to resulting HTML file', "--output-path",
help="output path to resulting HTML file",
) )

View File

@ -40,33 +40,32 @@ logger = logging.getLogger(__name__)
def embed_assets(index_file, output_path=None, append_pre="", append_post=""): def embed_assets(index_file, output_path=None, append_pre="", append_post=""):
init_files = {} init_files = {}
for filename in [ for filename in [
'init.css', "init.css",
'init.html', "init.html",
'bootstrap.js', "bootstrap.js",
'main.js', "main.js",
'inject_pre.js', "inject_pre.js",
'inject_post.js', "inject_post.js",
'pako.min.js', "pako.min.js",
'LICENSE', "LICENSE",
]: ]:
path = os.path.join(SCRIPT_PATH, 'assets', filename) path = os.path.join(SCRIPT_PATH, "assets", filename)
init_files[filename] = open(path, 'r').read() init_files[filename] = open(path, "r").read()
if not os.path.exists(index_file): if not os.path.exists(index_file):
raise FileNotFoundError('no such file: %s' % index_file) raise FileNotFoundError("no such file: %s" % index_file)
base_dir = os.path.dirname(index_file) base_dir = os.path.dirname(index_file)
base_name = os.path.basename(index_file) base_name = os.path.basename(index_file)
new_base_name = 'SELF_CONTAINED_' + base_name new_base_name = "SELF_CONTAINED_" + base_name
if not output_path: if not output_path:
output_path = os.path.join(base_dir, new_base_name) output_path = os.path.join(base_dir, new_base_name)
before = init_files['inject_pre.js'] + append_pre before = init_files["inject_pre.js"] + append_pre
after = init_files['inject_post.js'] + append_post after = init_files["inject_post.js"] + append_post
file_tree = load_filetree( file_tree = load_filetree(
base_dir, base_dir,
before=before, before=before,
@ -77,16 +76,16 @@ def embed_assets(index_file, output_path=None, append_pre="", append_post=""):
remote_resources = [] remote_resources = []
global_context = { global_context = {
'current_path': base_name, "current_path": base_name,
'file_tree': file_tree, "file_tree": file_tree,
'remote_resources': remote_resources, "remote_resources": remote_resources,
'main': init_files['main.js'], "main": init_files["main.js"],
} }
global_context = json.dumps(global_context) global_context = json.dumps(global_context)
logger.debug('total asset size: %d' % len(global_context)) logger.debug("total asset size: %d" % len(global_context))
global_context = deflate(global_context) global_context = deflate(global_context)
logger.debug('total asset size (compressed): %d' % len(global_context)) logger.debug("total asset size (compressed): %d" % len(global_context))
result = """ result = """
<!DOCTYPE html> <!DOCTYPE html>
@ -98,18 +97,18 @@ def embed_assets(index_file, output_path=None, append_pre="", append_post=""):
<script>{bootstrap} //# sourceURL=boostrap.js</script> <script>{bootstrap} //# sourceURL=boostrap.js</script>
</body><!-- {license} --></html> </body><!-- {license} --></html>
""".format( """.format(
style=init_files['init.css'], style=init_files["init.css"],
body=init_files['init.html'], body=init_files["init.html"],
pako=init_files['pako.min.js'], pako=init_files["pako.min.js"],
bootstrap=init_files['bootstrap.js'], bootstrap=init_files["bootstrap.js"],
global_context=global_context, global_context=global_context,
license=init_files['LICENSE'], license=init_files["LICENSE"],
) )
with open(output_path, 'w') as fp: with open(output_path, "w") as fp:
fp.write(result) fp.write(result)
logger.info('Result written to: %s' % output_path) logger.info("Result written to: %s" % output_path)
return output_path return output_path
@ -128,22 +127,30 @@ def prepare_file(filename, before, after):
""" """
_, ext = os.path.splitext(filename) _, ext = os.path.splitext(filename)
ext = ext.lower()[1:] ext = ext.lower()[1:]
data = open(filename, 'rb').read() data = open(filename, "rb").read()
mime_type = mime_type_from_bytes(filename, data) mime_type = mime_type_from_bytes(filename, data)
base64encoded = False base64encoded = False
if ext == 'css': if ext == "css":
# assuming all CSS files have names ending in '.css' # assuming all CSS files have names ending in '.css'
data = embed_css_resources(data, filename) data = embed_css_resources(data, filename)
elif ext in [ elif ext in [
'png', 'jpg', 'jpeg', 'woff', 'woff2', 'eot', 'ttf', 'gif', 'ico', "png",
"jpg",
"jpeg",
"woff",
"woff2",
"eot",
"ttf",
"gif",
"ico",
]: ]:
# JSON doesn't allow binary data # JSON doesn't allow binary data
data = base64.b64encode(data) data = base64.b64encode(data)
base64encoded = True base64encoded = True
elif ext in ['html', 'htm']: elif ext in ["html", "htm"]:
data = embed_html_resources( data = embed_html_resources(
data, data,
os.path.dirname(filename), os.path.dirname(filename),
@ -157,12 +164,12 @@ def prepare_file(filename, before, after):
except UnicodeError: except UnicodeError:
data = base64.b64encode(data).decode() data = base64.b64encode(data).decode()
logger.debug('loaded file: %s [%s, %d bytes]' % (filename, mime_type, len(data))) logger.debug("loaded file: %s [%s, %d bytes]" % (filename, mime_type, len(data)))
result = { result = {
'data': data, "data": data,
'mime_type': mime_type, "mime_type": mime_type,
'base64encoded': base64encoded, "base64encoded": base64encoded,
} }
return result return result
@ -179,18 +186,19 @@ def embed_html_resources(html, base_dir, before, after):
# This cannot be done in JavaScript, it would be too late # This cannot be done in JavaScript, it would be too late
import bs4 import bs4
soup = bs4.BeautifulSoup(html, 'lxml')
body = soup.find('body') soup = bs4.BeautifulSoup(html, "lxml")
head = soup.find('head') body = soup.find("body")
head = soup.find("head")
if head and before: if head and before:
script = soup.new_tag("script") script = soup.new_tag("script")
script.string = before + '//# sourceURL=inject_pre.js' script.string = before + "//# sourceURL=inject_pre.js"
head.insert(0, script) head.insert(0, script)
if body and after: if body and after:
script = soup.new_tag("script") script = soup.new_tag("script")
script.string = after + '//# sourceURL=inject_post.js' script.string = after + "//# sourceURL=inject_post.js"
body.append(script) body.append(script)
# TODO embed remote resources in case we want the entire file to be # TODO embed remote resources in case we want the entire file to be
@ -203,12 +211,12 @@ def to_data_uri(filename, mime_type=None):
"""Create a data URI from the contents of a file""" """Create a data URI from the contents of a file"""
try: try:
data = open(filename, 'br').read() data = open(filename, "br").read()
except FileNotFoundError as e: except FileNotFoundError as e:
logger.error(str(e)) logger.error(str(e))
data = base64.b64encode(data) data = base64.b64encode(data)
if not mime_type: if not mime_type:
mime_type = 'application/octet-stream' mime_type = "application/octet-stream"
return "data:%s;charset=utf-8;base64, %s" % ( return "data:%s;charset=utf-8;base64, %s" % (
mime_type, mime_type,
data.decode(), data.decode(),
@ -225,61 +233,63 @@ def embed_css_resources(css, filename):
# First, make sure all @import's are using url(), because these are both valid: # First, make sure all @import's are using url(), because these are both valid:
# @import url("foo.css"); # @import url("foo.css");
# @import "foo.css"; # @import "foo.css";
regex = rb'''(?P<rule>@import\s*['"]?(?P<url>.*?)['"]?\s*;)''' regex = rb"""(?P<rule>@import\s*['"]?(?P<url>.*?)['"]?\s*;)"""
replace_rules = {} replace_rules = {}
for m in re.finditer(regex, css, flags=re.IGNORECASE): for m in re.finditer(regex, css, flags=re.IGNORECASE):
if not m['url'].lower().startswith(b'url('): if not m["url"].lower().startswith(b"url("):
replace_rules[m['rule']] = b"@import url('%s');" % m['url'] replace_rules[m["rule"]] = b"@import url('%s');" % m["url"]
for orig, new in replace_rules.items(): for orig, new in replace_rules.items():
css = css.replace(orig, new) css = css.replace(orig, new)
# Quotes are optional. But then URLs can contain escaped characters. # Quotes are optional. But then URLs can contain escaped characters.
regex = ( regex = (
rb'''(?P<url_statement>url\(['"]?(?P<url>.*?)['"]?\))''' rb"""(?P<url_statement>url\(['"]?(?P<url>.*?)['"]?\))"""
rb'''(\s*format\(['"](?P<format>.*?)['"]\))?''' rb"""(\s*format\(['"](?P<format>.*?)['"]\))?"""
) )
replace_rules = {} replace_rules = {}
for m in re.finditer(regex, css, flags=re.IGNORECASE): for m in re.finditer(regex, css, flags=re.IGNORECASE):
if re.match(b'''['"]?data:.*''', m['url']): if re.match(b"""['"]?data:.*""", m["url"]):
continue continue
path = m['url'].decode() path = m["url"].decode()
if '?' in path: if "?" in path:
path = path.split('?')[0] path = path.split("?")[0]
if '#' in path: if "#" in path:
path = path.split('#')[0] path = path.split("#")[0]
path = os.path.dirname(filename) + '/' + path path = os.path.dirname(filename) + "/" + path
try: try:
content = open(path, 'rb').read() content = open(path, "rb").read()
except FileNotFoundError as e: except FileNotFoundError as e:
logger.error(str(e)) logger.error(str(e))
continue continue
# If it's binary, determine mime type and encode in base64 # If it's binary, determine mime type and encode in base64
if m['format']: if m["format"]:
mime_type = 'font/' + m['format'].decode() mime_type = "font/" + m["format"].decode()
elif path[-3:].lower() == 'eot': elif path[-3:].lower() == "eot":
mime_type = 'font/eot' mime_type = "font/eot"
elif path[-3:].lower() == 'css': elif path[-3:].lower() == "css":
mime_type = 'text/css' mime_type = "text/css"
content = embed_css_resources(content, filename) content = embed_css_resources(content, filename)
else: else:
mime_type = mime_type_from_bytes(filename, content) mime_type = mime_type_from_bytes(filename, content)
if not mime_type: if not mime_type:
logger.error('Unable to determine mime type: %s' % path) logger.error("Unable to determine mime type: %s" % path)
mime_type = 'application/octet-stream' mime_type = "application/octet-stream"
content = base64.b64encode(content) content = base64.b64encode(content)
replace_rules[m['url_statement']] = \ replace_rules[m["url_statement"]] = (
b'url("data:%(mime_type)s;charset=utf-8;base64, %(content)s")' % { b'url("data:%(mime_type)s;charset=utf-8;base64, %(content)s")'
b'content': content, % {
b'mime_type': mime_type.encode(), b"content": content,
} b"mime_type": mime_type.encode(),
}
)
for orig, new in replace_rules.items(): for orig, new in replace_rules.items():
css = css.replace(orig, new) css = css.replace(orig, new)
@ -295,10 +305,9 @@ def mime_type_from_bytes(filename, buffer):
if not mime_type: if not mime_type:
logger.error( logger.error(
"Unknown mime type (%s): %s" % "Unknown mime type (%s): %s" % (filename, str(buffer[:10]) + "...")
(filename, str(buffer[:10]) + '...')
) )
mime_type = 'application/octet-stream' mime_type = "application/octet-stream"
return mime_type return mime_type
@ -308,7 +317,7 @@ def load_filetree(base_dir, before=None, after=None, exclude_pattern=None):
result = {} result = {}
base_dir = Path(base_dir) base_dir = Path(base_dir)
for path in base_dir.rglob('*'): for path in base_dir.rglob("*"):
if exclude_pattern and fnmatch(path.name, exclude_pattern): if exclude_pattern and fnmatch(path.name, exclude_pattern):
continue continue
if path.is_file(): if path.is_file():
@ -318,7 +327,7 @@ def load_filetree(base_dir, before=None, after=None, exclude_pattern=None):
before, before,
after, after,
) )
logger.debug('Packed file %s [%d]' % (key, len(result[key]))) logger.debug("Packed file %s [%d]" % (key, len(result[key])))
return result return result
@ -329,18 +338,20 @@ def extract_assets(input_path, output_path=None):
Import for debugging""" Import for debugging"""
if not output_path: if not output_path:
output_path = '.' output_path = "."
html = open(input_path, 'r').read() html = open(input_path, "r").read()
try: try:
# Find large base64 blob # Find large base64 blob
m = re.search('.*<script>.*window.*"(?P<blob>[A-Za-z0-9/+]{128,})".*</script>.*', html) m = re.search(
blob = m['blob'] '.*<script>.*window.*"(?P<blob>[A-Za-z0-9/+]{128,})".*</script>.*', html
)
blob = m["blob"]
blob = base64.b64decode(blob) blob = base64.b64decode(blob)
blob = zlib.decompress(blob).decode() blob = zlib.decompress(blob).decode()
blob = json.loads(blob) blob = json.loads(blob)
file_tree = blob['file_tree'] file_tree = blob["file_tree"]
except Exception as e: except Exception as e:
logger.error(str(e)) logger.error(str(e))
logger.error("Does not look like a Zundler output file: %s" % input_path) logger.error("Does not look like a Zundler output file: %s" % input_path)
@ -348,14 +359,14 @@ def extract_assets(input_path, output_path=None):
for filename, file in file_tree.items(): for filename, file in file_tree.items():
filename = os.path.join(output_path, filename) filename = os.path.join(output_path, filename)
os.makedirs(os.path.dirname(filename) or '.', exist_ok=True) os.makedirs(os.path.dirname(filename) or ".", exist_ok=True)
data = file['data'] data = file["data"]
if file['base64encoded']: if file["base64encoded"]:
data = base64.b64decode(data) data = base64.b64decode(data)
else: else:
data = data.encode() data = data.encode()
open(filename, 'wb').write(data) open(filename, "wb").write(data)
file['data'] = file['data'][:100] + '...' file["data"] = file["data"][:100] + "..."
with open(os.path.join(output_path, 'file_tree.json'), 'w') as fp: with open(os.path.join(output_path, "file_tree.json"), "w") as fp:
json.dump(file_tree, fp, indent=2) json.dump(file_tree, fp, indent=2)

View File

@ -8,12 +8,12 @@ from sphinx.util import logging, progress_message
from sphinx.util.osutil import relpath from sphinx.util.osutil import relpath
__ = get_translation(__name__, 'console') __ = get_translation(__name__, "console")
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class ZundlerBuilder(StandaloneHTMLBuilder): class ZundlerBuilder(StandaloneHTMLBuilder):
name = 'zundler' name = "zundler"
epilog = "" epilog = ""
def __init__(self, app, env=None): def __init__(self, app, env=None):
@ -22,9 +22,8 @@ class ZundlerBuilder(StandaloneHTMLBuilder):
else: else:
super().__init__(app) super().__init__(app)
self.epilog = ( self.epilog = "Your self-contained HTML file is now in %s." % relpath(
'Your self-contained HTML file is now in %s.' % self.app.original_outdir
relpath(self.app.original_outdir)
) )
def finish(self): def finish(self):
@ -38,15 +37,15 @@ class ZundlerBuilder(StandaloneHTMLBuilder):
input_path = os.path.join( input_path = os.path.join(
self.outdir, self.outdir,
root_doc + '.html', root_doc + ".html",
) )
output_path = os.path.join( output_path = os.path.join(
self.app.original_outdir, self.app.original_outdir,
root_doc + '.html', root_doc + ".html",
) )
with progress_message(__('embedding HTML assets')): with progress_message(__("embedding HTML assets")):
embed_assets( embed_assets(
input_path, input_path,
output_path=output_path, output_path=output_path,
@ -60,11 +59,11 @@ def setup(app):
# then $builddir/$target second. # then $builddir/$target second.
outdir = os.path.join( outdir = os.path.join(
os.path.dirname(app.outdir), os.path.dirname(app.outdir),
'html', "html",
) )
doctreedir = os.path.join( doctreedir = os.path.join(
os.path.dirname(app.outdir), os.path.dirname(app.outdir),
'doctree', "doctree",
) )
app.original_outdir = app.outdir app.original_outdir = app.outdir
@ -76,27 +75,27 @@ def setup(app):
Path(app.outdir).mkdir(parents=True, exist_ok=True) Path(app.outdir).mkdir(parents=True, exist_ok=True)
app.add_config_value( app.add_config_value(
'zundler_root_doc', "zundler_root_doc",
None, None,
'', "",
) )
app.add_config_value( app.add_config_value(
'zundler_append_pre', "zundler_append_pre",
'', "",
'', "",
) )
app.add_config_value( app.add_config_value(
'zundler_append_post', "zundler_append_post",
'', "",
'', "",
) )
app.add_builder(ZundlerBuilder) app.add_builder(ZundlerBuilder)
return { return {
'version': '0.1', "version": "0.1",
'parallel_read_safe': True, "parallel_read_safe": True,
'parallel_write_safe': True, "parallel_write_safe": True,
} }