Apply black

This commit is contained in:
Adrian Vollmer 2024-03-31 10:56:04 +02:00
parent c58206b892
commit 0bde114839
4 changed files with 140 additions and 120 deletions

View File

@ -1,8 +1,10 @@
def main():
from .args import parse_args
args = parse_args()
from .embed import embed_assets, extract_assets
if args.revert:
extract_assets(
args.input_path,

View File

@ -10,7 +10,9 @@ except ImportError:
try:
__version__ = importlib_metadata.version(__package__ or __name__)
# __summary__ = importlib_metadata.metadata(__package__ or __name__)['summary']
__summary__ = "Bundle assets of distributed HTML docs into one self-contained HTML file"
__summary__ = (
"Bundle assets of distributed HTML docs into one self-contained HTML file"
)
except importlib_metadata.PackageNotFoundError:
__version__ = "??"
__summary__ = "??"
@ -21,40 +23,46 @@ parser = argparse.ArgumentParser(
)
parser.add_argument(
'-v', '--version', action='version',
"-v",
"--version",
action="version",
version=__version__,
)
parser.add_argument(
'-R', '--revert',
"-R",
"--revert",
default=False,
action='store_true',
help='set this flag for the reverse operation: deflate a Zundler file'
' into its components (ouput_path must be a directory; "." by default)',
action="store_true",
help="set this flag for the reverse operation: deflate a Zundler file"
' into its components (ouput_path must be a directory; "." by default)',
)
parser.add_argument(
'-P', '--append-pre',
"-P",
"--append-pre",
default="",
help='append JS code to inject_pre script (default: %(default)s)',
help="append JS code to inject_pre script (default: %(default)s)",
)
parser.add_argument(
'-p', '--append-post',
"-p",
"--append-post",
default="",
help='append JS code to inject_post script (default: %(default)s)',
help="append JS code to inject_post script (default: %(default)s)",
)
parser.add_argument(
'input_path',
help='input path to the root HTML file',
"input_path",
help="input path to the root HTML file",
)
parser.add_argument(
'-o', '--output-path',
help='output path to resulting HTML file',
"-o",
"--output-path",
help="output path to resulting HTML file",
)

View File

@ -40,33 +40,32 @@ logger = logging.getLogger(__name__)
def embed_assets(index_file, output_path=None, append_pre="", append_post=""):
init_files = {}
for filename in [
'init.css',
'init.html',
'bootstrap.js',
'main.js',
'inject_pre.js',
'inject_post.js',
'pako.min.js',
'LICENSE',
"init.css",
"init.html",
"bootstrap.js",
"main.js",
"inject_pre.js",
"inject_post.js",
"pako.min.js",
"LICENSE",
]:
path = os.path.join(SCRIPT_PATH, 'assets', filename)
init_files[filename] = open(path, 'r').read()
path = os.path.join(SCRIPT_PATH, "assets", filename)
init_files[filename] = open(path, "r").read()
if not os.path.exists(index_file):
raise FileNotFoundError('no such file: %s' % index_file)
raise FileNotFoundError("no such file: %s" % index_file)
base_dir = os.path.dirname(index_file)
base_name = os.path.basename(index_file)
new_base_name = 'SELF_CONTAINED_' + base_name
new_base_name = "SELF_CONTAINED_" + base_name
if not output_path:
output_path = os.path.join(base_dir, new_base_name)
before = init_files['inject_pre.js'] + append_pre
after = init_files['inject_post.js'] + append_post
before = init_files["inject_pre.js"] + append_pre
after = init_files["inject_post.js"] + append_post
file_tree = load_filetree(
base_dir,
before=before,
@ -77,16 +76,16 @@ def embed_assets(index_file, output_path=None, append_pre="", append_post=""):
remote_resources = []
global_context = {
'current_path': base_name,
'file_tree': file_tree,
'remote_resources': remote_resources,
'main': init_files['main.js'],
"current_path": base_name,
"file_tree": file_tree,
"remote_resources": remote_resources,
"main": init_files["main.js"],
}
global_context = json.dumps(global_context)
logger.debug('total asset size: %d' % len(global_context))
logger.debug("total asset size: %d" % len(global_context))
global_context = deflate(global_context)
logger.debug('total asset size (compressed): %d' % len(global_context))
logger.debug("total asset size (compressed): %d" % len(global_context))
result = """
<!DOCTYPE html>
@ -98,18 +97,18 @@ def embed_assets(index_file, output_path=None, append_pre="", append_post=""):
<script>{bootstrap} //# sourceURL=boostrap.js</script>
</body><!-- {license} --></html>
""".format(
style=init_files['init.css'],
body=init_files['init.html'],
pako=init_files['pako.min.js'],
bootstrap=init_files['bootstrap.js'],
style=init_files["init.css"],
body=init_files["init.html"],
pako=init_files["pako.min.js"],
bootstrap=init_files["bootstrap.js"],
global_context=global_context,
license=init_files['LICENSE'],
license=init_files["LICENSE"],
)
with open(output_path, 'w') as fp:
with open(output_path, "w") as fp:
fp.write(result)
logger.info('Result written to: %s' % output_path)
logger.info("Result written to: %s" % output_path)
return output_path
@ -128,22 +127,30 @@ def prepare_file(filename, before, after):
"""
_, ext = os.path.splitext(filename)
ext = ext.lower()[1:]
data = open(filename, 'rb').read()
data = open(filename, "rb").read()
mime_type = mime_type_from_bytes(filename, data)
base64encoded = False
if ext == 'css':
if ext == "css":
# assuming all CSS files have names ending in '.css'
data = embed_css_resources(data, filename)
elif ext in [
'png', 'jpg', 'jpeg', 'woff', 'woff2', 'eot', 'ttf', 'gif', 'ico',
"png",
"jpg",
"jpeg",
"woff",
"woff2",
"eot",
"ttf",
"gif",
"ico",
]:
# JSON doesn't allow binary data
data = base64.b64encode(data)
base64encoded = True
elif ext in ['html', 'htm']:
elif ext in ["html", "htm"]:
data = embed_html_resources(
data,
os.path.dirname(filename),
@ -157,12 +164,12 @@ def prepare_file(filename, before, after):
except UnicodeError:
data = base64.b64encode(data).decode()
logger.debug('loaded file: %s [%s, %d bytes]' % (filename, mime_type, len(data)))
logger.debug("loaded file: %s [%s, %d bytes]" % (filename, mime_type, len(data)))
result = {
'data': data,
'mime_type': mime_type,
'base64encoded': base64encoded,
"data": data,
"mime_type": mime_type,
"base64encoded": base64encoded,
}
return result
@ -179,18 +186,19 @@ def embed_html_resources(html, base_dir, before, after):
# This cannot be done in JavaScript, it would be too late
import bs4
soup = bs4.BeautifulSoup(html, 'lxml')
body = soup.find('body')
head = soup.find('head')
soup = bs4.BeautifulSoup(html, "lxml")
body = soup.find("body")
head = soup.find("head")
if head and before:
script = soup.new_tag("script")
script.string = before + '//# sourceURL=inject_pre.js'
script.string = before + "//# sourceURL=inject_pre.js"
head.insert(0, script)
if body and after:
script = soup.new_tag("script")
script.string = after + '//# sourceURL=inject_post.js'
script.string = after + "//# sourceURL=inject_post.js"
body.append(script)
# TODO embed remote resources in case we want the entire file to be
@ -203,12 +211,12 @@ def to_data_uri(filename, mime_type=None):
"""Create a data URI from the contents of a file"""
try:
data = open(filename, 'br').read()
data = open(filename, "br").read()
except FileNotFoundError as e:
logger.error(str(e))
data = base64.b64encode(data)
if not mime_type:
mime_type = 'application/octet-stream'
mime_type = "application/octet-stream"
return "data:%s;charset=utf-8;base64, %s" % (
mime_type,
data.decode(),
@ -225,61 +233,63 @@ def embed_css_resources(css, filename):
# First, make sure all @import's are using url(), because these are both valid:
# @import url("foo.css");
# @import "foo.css";
regex = rb'''(?P<rule>@import\s*['"]?(?P<url>.*?)['"]?\s*;)'''
regex = rb"""(?P<rule>@import\s*['"]?(?P<url>.*?)['"]?\s*;)"""
replace_rules = {}
for m in re.finditer(regex, css, flags=re.IGNORECASE):
if not m['url'].lower().startswith(b'url('):
replace_rules[m['rule']] = b"@import url('%s');" % m['url']
if not m["url"].lower().startswith(b"url("):
replace_rules[m["rule"]] = b"@import url('%s');" % m["url"]
for orig, new in replace_rules.items():
css = css.replace(orig, new)
# Quotes are optional. But then URLs can contain escaped characters.
regex = (
rb'''(?P<url_statement>url\(['"]?(?P<url>.*?)['"]?\))'''
rb'''(\s*format\(['"](?P<format>.*?)['"]\))?'''
rb"""(?P<url_statement>url\(['"]?(?P<url>.*?)['"]?\))"""
rb"""(\s*format\(['"](?P<format>.*?)['"]\))?"""
)
replace_rules = {}
for m in re.finditer(regex, css, flags=re.IGNORECASE):
if re.match(b'''['"]?data:.*''', m['url']):
if re.match(b"""['"]?data:.*""", m["url"]):
continue
path = m['url'].decode()
path = m["url"].decode()
if '?' in path:
path = path.split('?')[0]
if '#' in path:
path = path.split('#')[0]
if "?" in path:
path = path.split("?")[0]
if "#" in path:
path = path.split("#")[0]
path = os.path.dirname(filename) + '/' + path
path = os.path.dirname(filename) + "/" + path
try:
content = open(path, 'rb').read()
content = open(path, "rb").read()
except FileNotFoundError as e:
logger.error(str(e))
continue
# If it's binary, determine mime type and encode in base64
if m['format']:
mime_type = 'font/' + m['format'].decode()
elif path[-3:].lower() == 'eot':
mime_type = 'font/eot'
elif path[-3:].lower() == 'css':
mime_type = 'text/css'
if m["format"]:
mime_type = "font/" + m["format"].decode()
elif path[-3:].lower() == "eot":
mime_type = "font/eot"
elif path[-3:].lower() == "css":
mime_type = "text/css"
content = embed_css_resources(content, filename)
else:
mime_type = mime_type_from_bytes(filename, content)
if not mime_type:
logger.error('Unable to determine mime type: %s' % path)
mime_type = 'application/octet-stream'
logger.error("Unable to determine mime type: %s" % path)
mime_type = "application/octet-stream"
content = base64.b64encode(content)
replace_rules[m['url_statement']] = \
b'url("data:%(mime_type)s;charset=utf-8;base64, %(content)s")' % {
b'content': content,
b'mime_type': mime_type.encode(),
}
replace_rules[m["url_statement"]] = (
b'url("data:%(mime_type)s;charset=utf-8;base64, %(content)s")'
% {
b"content": content,
b"mime_type": mime_type.encode(),
}
)
for orig, new in replace_rules.items():
css = css.replace(orig, new)
@ -295,10 +305,9 @@ def mime_type_from_bytes(filename, buffer):
if not mime_type:
logger.error(
"Unknown mime type (%s): %s" %
(filename, str(buffer[:10]) + '...')
"Unknown mime type (%s): %s" % (filename, str(buffer[:10]) + "...")
)
mime_type = 'application/octet-stream'
mime_type = "application/octet-stream"
return mime_type
@ -308,7 +317,7 @@ def load_filetree(base_dir, before=None, after=None, exclude_pattern=None):
result = {}
base_dir = Path(base_dir)
for path in base_dir.rglob('*'):
for path in base_dir.rglob("*"):
if exclude_pattern and fnmatch(path.name, exclude_pattern):
continue
if path.is_file():
@ -318,7 +327,7 @@ def load_filetree(base_dir, before=None, after=None, exclude_pattern=None):
before,
after,
)
logger.debug('Packed file %s [%d]' % (key, len(result[key])))
logger.debug("Packed file %s [%d]" % (key, len(result[key])))
return result
@ -329,18 +338,20 @@ def extract_assets(input_path, output_path=None):
Import for debugging"""
if not output_path:
output_path = '.'
output_path = "."
html = open(input_path, 'r').read()
html = open(input_path, "r").read()
try:
# Find large base64 blob
m = re.search('.*<script>.*window.*"(?P<blob>[A-Za-z0-9/+]{128,})".*</script>.*', html)
blob = m['blob']
m = re.search(
'.*<script>.*window.*"(?P<blob>[A-Za-z0-9/+]{128,})".*</script>.*', html
)
blob = m["blob"]
blob = base64.b64decode(blob)
blob = zlib.decompress(blob).decode()
blob = json.loads(blob)
file_tree = blob['file_tree']
file_tree = blob["file_tree"]
except Exception as e:
logger.error(str(e))
logger.error("Does not look like a Zundler output file: %s" % input_path)
@ -348,14 +359,14 @@ def extract_assets(input_path, output_path=None):
for filename, file in file_tree.items():
filename = os.path.join(output_path, filename)
os.makedirs(os.path.dirname(filename) or '.', exist_ok=True)
data = file['data']
if file['base64encoded']:
os.makedirs(os.path.dirname(filename) or ".", exist_ok=True)
data = file["data"]
if file["base64encoded"]:
data = base64.b64decode(data)
else:
data = data.encode()
open(filename, 'wb').write(data)
file['data'] = file['data'][:100] + '...'
open(filename, "wb").write(data)
file["data"] = file["data"][:100] + "..."
with open(os.path.join(output_path, 'file_tree.json'), 'w') as fp:
with open(os.path.join(output_path, "file_tree.json"), "w") as fp:
json.dump(file_tree, fp, indent=2)

View File

@ -8,12 +8,12 @@ from sphinx.util import logging, progress_message
from sphinx.util.osutil import relpath
__ = get_translation(__name__, 'console')
__ = get_translation(__name__, "console")
logger = logging.getLogger(__name__)
class ZundlerBuilder(StandaloneHTMLBuilder):
name = 'zundler'
name = "zundler"
epilog = ""
def __init__(self, app, env=None):
@ -22,9 +22,8 @@ class ZundlerBuilder(StandaloneHTMLBuilder):
else:
super().__init__(app)
self.epilog = (
'Your self-contained HTML file is now in %s.' %
relpath(self.app.original_outdir)
self.epilog = "Your self-contained HTML file is now in %s." % relpath(
self.app.original_outdir
)
def finish(self):
@ -38,15 +37,15 @@ class ZundlerBuilder(StandaloneHTMLBuilder):
input_path = os.path.join(
self.outdir,
root_doc + '.html',
root_doc + ".html",
)
output_path = os.path.join(
self.app.original_outdir,
root_doc + '.html',
root_doc + ".html",
)
with progress_message(__('embedding HTML assets')):
with progress_message(__("embedding HTML assets")):
embed_assets(
input_path,
output_path=output_path,
@ -60,11 +59,11 @@ def setup(app):
# then $builddir/$target second.
outdir = os.path.join(
os.path.dirname(app.outdir),
'html',
"html",
)
doctreedir = os.path.join(
os.path.dirname(app.outdir),
'doctree',
"doctree",
)
app.original_outdir = app.outdir
@ -76,27 +75,27 @@ def setup(app):
Path(app.outdir).mkdir(parents=True, exist_ok=True)
app.add_config_value(
'zundler_root_doc',
"zundler_root_doc",
None,
'',
"",
)
app.add_config_value(
'zundler_append_pre',
'',
'',
"zundler_append_pre",
"",
"",
)
app.add_config_value(
'zundler_append_post',
'',
'',
"zundler_append_post",
"",
"",
)
app.add_builder(ZundlerBuilder)
return {
'version': '0.1',
'parallel_read_safe': True,
'parallel_write_safe': True,
"version": "0.1",
"parallel_read_safe": True,
"parallel_write_safe": True,
}