""" Embed HTML assets. It creates an HTML file that has three script tags: 1. A virtual file tree containing all assets in zipped form 2. The pako JS library to unzip the assets 3. Some boostrap code that fixes the HTML so it loads all assets from the virtual file tree instead of the file system Also, two scripts are injected into all HTML files in the file tree. One as the first child of
, one as the last child of . The first does some monkeypatching, the last sets up all magic. Author: Adrian Vollmer """ import base64 from fnmatch import fnmatch import json import logging import os from pathlib import Path import zlib SCRIPT_PATH = os.path.abspath(os.path.dirname(__file__)) logger = logging.getLogger(__name__) def embed_assets(index_file, output_path=None): init_files = {} for filename in [ 'init.js', 'inject.js', 'init.css', 'init.html', 'monkeypatch.js', 'pako.min.js', 'LICENSE', ]: path = os.path.join(SCRIPT_PATH, 'assets', filename) init_files[filename] = open(path, 'r').read() if not os.path.exists(index_file): raise FileNotFoundError('no such file: %s' % index_file) base_dir = os.path.dirname(index_file) base_name = os.path.basename(index_file) new_base_name = 'SELF_CONTAINED_' + base_name if not output_path: output_path = os.path.join(base_dir, new_base_name) file_tree = load_filetree( base_dir, before=init_files['monkeypatch.js'], after=init_files['inject.js'], exclude_pattern=new_base_name, ) file_tree = json.dumps(file_tree) logger.debug('total asset size: %d' % len(file_tree)) file_tree = deflate(file_tree) logger.debug('total asset size (compressed): %d' % len(file_tree)) remote_resources = [] global_context = { 'current_path': base_name, 'file_tree': file_tree, 'remote_resources': remote_resources, } global_context = json.dumps(global_context) result = """ {body} """.format( style=init_files['init.css'], init_js=init_files['init.js'], pako=init_files['pako.min.js'], body=init_files['init.html'], global_context=global_context, license=init_files['LICENSE'], ) with open(output_path, 'w') as fp: fp.write(result) logger.info('Result written to: %s' % output_path) return output_path def prepare_file(filename, before, after): """Prepare a file for the file tree Referenced assets in CSS files will be embedded. HTML files will be injected with two scripts. `filename`: The name of the file `before`: Javascript code that will be inserted as the first child of `` if the file is HTML. `after`: Javascript code that will be inserted as the last child of `` if the file is HTML. """ _, ext = os.path.splitext(filename) ext = ext.lower()[1:] data = open(filename, 'rb').read() mime_type = mime_type_from_bytes(data) base64encoded = False if ext == 'css': # assuming all CSS files have names ending in '.css' data = embed_css_resources(data, filename) elif ext in [ 'png', 'jpg', 'jpeg', 'woff', 'woff2', 'eot', 'ttf', 'gif', 'ico', ]: # JSON doesn't allow binary data data = base64.b64encode(data) base64encoded = True elif ext in ['html', 'htm']: data = embed_html_resources( data, os.path.dirname(filename), before, after, ).encode() if not isinstance(data, str): try: data = data.decode() except UnicodeError: data = base64.b64encode(data).decode() logger.debug('loaded file: %s [%s, %d bytes]' % (filename, mime_type, len(data))) result = { 'data': data, 'mime_type': mime_type, 'base64encoded': base64encoded, } return result def deflate(data): data = zlib.compress(data.encode()) data = base64.b64encode(data).decode() return data def embed_html_resources(html, base_dir, before, after): """Embed fonts in preload links to avoid jumps when loading""" # This cannot be done in JavaScript, it would be too late import bs4 soup = bs4.BeautifulSoup(html, 'lxml') body = soup.find('body') head = soup.find('head') if head and before: script = soup.new_tag("script") script.string = before head.insert(0, script) if body and after: script = soup.new_tag("script") script.string = after body.append(script) # TODO embed remote resources in case we want the entire file to be # usable in an offline environment return str(soup) def to_data_uri(filename, mime_type=None): """Create a data URI from the contents of a file""" data = open(filename, 'br').read() data = base64.b64encode(data) if not mime_type: mime_type = 'application/octet-stream' return "data:%s;charset=utf-8;base64, %s" % ( mime_type, data.decode(), ) def embed_css_resources(css, filename): """Replace url(