Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- "use strict";
- /*
- Graphics pasted from the clipboard into the TinyMCE rich text editor will be inserted
- into the HTML source code of the edited document as an <img src="image.png;base64,..."> tag,
- with the base64 encoded data of the image in its 'src' attribute.
- For several reasons, it is better to have the images in separate files instead,
- which are just referred to in the HTML source code by a relative URL
- This script
- - finds the base64 inline images
- - extracts the base64 data and saves it as a file with an auto-generated name
- - replaces the src attribute in the HTML code with this filename
- - saves the adapted HTML file
- We used nodejs with the package 'node-html-parser', which is very easy to use for such tasks
- */
- const fs = require('fs').promises;
- const html = require('node-html-parser');
- const IMGDIR = 'C:\\\\Temp\\sample\\'; // The directory for the extracted images
- const IMGDIR_RELATIVE = 'sample\\'; // Relative name of that directory, as seen from the HTML file location
- const HTMLFILE = 'C:\\\\Temp\\sample.html'; // Absolute path of the HTML file containing base64 encoded images
- run();
- async function run() {
- try {
- // Read the HTML file
- const fileContent = await fs.readFile(HTMLFILE, 'utf8');
- // Extract images and replace the src attributes by their relative URL
- const images = extractImages( fileContent );
- // Save the images
- for (let img of images) saveToFile(...img);
- // Some statistics
- console.log("Total # of images: "+images.length);
- console.log("Total amount of base64 characters: "+images.reduce((acc,[,data])=>acc+data.length,0));
- } catch(err) {
- console.error(err);
- }
- // Save the image
- // Leave the decoding "base64 -> binary image" to the writeFile function
- function saveToFile(fileName,data) {
- fs.writeFile(IMGDIR+fileName, data, {encoding: 'base64'}, function(err) {
- if (err) console.err('Error:',err);
- }
- );
- }
- }
- // Extracts the base64 encoded images into an array of pairs [fileName,data]
- // and returns this array
- // Replaces the src attributes of the images by the fileName URL
- // Saves the modified HTML file into a file with additional suffix ".new"
- function extractImages(htmlContent) {
- const doc = html.parse( htmlContent );
- const imgElements = doc.querySelectorAll("img");
- let images = [], i = 0;
- for (let img of imgElements) {
- try {
- let [fileName,] = pair = extractSingleImage(img,++i);
- images.push(pair);
- img.setAttribute("src",IMGDIR_RELATIVE+fileName);
- }
- catch(err){
- console.log(err);
- }
- });
- fs.writeFile(HTMLFILE+".new",doc+"");
- return images;
- // Returns the pair [fileName,data] extracted from a single <img> element
- // Generates a name of the form "imageNNNN.png" (for example) from MIME type "image/png"
- function extractSingleImage(img,i) {
- let a,data,fileName;
- let src = img.getAttribute("src");
- if (src) {
- [,a,data] = src.match(/data:([^;]*);base64,\s*(.*)/);
- fileName = a.replace(/\//,(i+"").padStart(4,"0")+".");
- return [fileName,data];
- }
- else {
- throw "No src attribute for IMG tag:"+(img+"").substring(1,100);
- }
- }
- }
Add Comment
Please, Sign In to add comment