1

Background:
I am try to fill in a pdf file that:
a) has form fields.
b) that is stored on Google Drive.

with data that is stored in Google spreadsheet.

I am using:

  1. Google Apps Script.
  2. HtmlService
  3. PDF-lib.js in a htmlOutput object generated from a htmlTemplate.

The work flow is:

  1. The showModalDialog_downloadFilledPDFform_NAMOFFORM() function is called from a menu.

The function is:

function showModalDialog_downloadFilledPDFform_NAMOFFORM() {
  var pdf_template_file_url = getPDFfileURL("form1.pdf");
  var htmlTemplate = HtmlService.createTemplateFromFile('downloadFilledPDFformHTML');
  
  htmlTemplate.dataFromServerTemplate = { pdf_template_file: "form1.pdf", pdf_template_file_url: pdf_template_file_url };
  
  var htmlOutput = htmlTemplate.evaluate();
  htmlOutput.setWidth(648.1);
  htmlOutput.setHeight(286.300)
  SpreadsheetApp.getUi().showModalDialog(htmlOutput, 'Download filled PDF');
}

The url that is passed to the htmlTemplate is generated by: "fillPDFForm.gs"

function: fillPDFForm.gs:

var pdfFileNamesAndIDs = [ ]
  pdfFileNamesAndIDs.push(["form1.pdf", "1y8F5NgnK50mdtWSR6v1b8pELsbbBJert"])
  pdfFileNamesAndIDs.push(["form2.pdf", "1B4BOaI-BqFmhmnFx7FaT-yys-U0vkYKz"])
  pdfFileNamesAndIDs.push(["form3.pdf", "17LrJpRA5oBZBqw-2du1H74KxWIX55qYC"])

function getPDFfileURL(fileName) {
  var documentID = "";
  for (var i in pdfFileNamesAndIDs) {
    //console.log(pdfFileNamesAndIDs[i][0]);
    if (pdfFileNamesAndIDs[i][0] == fileName) {
      documentID = pdfFileNamesAndIDs[i][1];
      console.log("documentID: " + documentID);
    }
  }

  var documentFile = DriveApp.getFileById(documentID);
  var documentURL = documentFile.getDownloadUrl();
  Logger.log("documentURL = "+documentURL);

  return documentURL;
}

The Problem:
The URL generated by getPDFfileURL() doesn't work in the html file generated in showModalDialog_downloadFilledPDFform_NAMOFFORM().

The error in Chrome dev console is:

pdf-lib@1.11.0:15 Uncaught (in promise) Error: Failed to parse PDF document (line:0 col:0 offset=0): No PDF header found
    at e [as constructor] (pdf-lib@1.11.0:15:189222)
    at new e (pdf-lib@1.11.0:15:190065)
    at e.parseHeader (pdf-lib@1.11.0:15:401731)
    at e.<anonymous> (pdf-lib@1.11.0:15:400782)
    at pdf-lib@1.11.0:15:1845
    at Object.next (pdf-lib@1.11.0:15:1950)
    at pdf-lib@1.11.0:15:887
    at new Promise (<anonymous>)
    at i (pdf-lib@1.11.0:15:632)
    at e.parseDocument (pdf-lib@1.11.0:15:400580)

The basic concept for the html page (shown as a modal dialog box), came from: https://jsfiddle.net/Hopding/0mwfqkv6/3/

The contents of the htmlTemplate are:

<!DOCTYPE html>
<html>
  <head>
    <base target="_top">

    <!-- Add Stylesheet -->
    <?!= HtmlService.createHtmlOutputFromFile('downloadFilledPDFformCSS').getContent(); ?>

    <!-- Add pdf-lib and downloadjs libraries -->
    <!-- https://pdf-lib.js.org/ and https://github.com/rndme/download -->
    <!-- https://jsfiddle.net/Hopding/0mwfqkv6/3/ -->
    <script src="https://unpkg.com/pdf-lib@1.11.0"></script>
    <script src="https://unpkg.com/downloadjs@1.4.7"></script>
  </head>

  <body>
    <h2 id="myTitle"></h2>
    <p>Click the button to fill form fields in an the following PDF document: <code id="pdf_template_file">pdf-lib</code></p>
    <button onclick="fillForm()">Fill PDF</button>
    <p class="small">(Your browser will download the resulting file)</p>
  </body>

  <script>
      const data = <?!= JSON.stringify(dataFromServerTemplate) ?>; //Stores the data directly in the javascript code

      function removeExtension(filename) {
        return filename.substring(0, filename.lastIndexOf('.')) || filename;
      }
      const pdf_template_file = data.pdf_template_file;
      const pdf_template_file_basename = removeExtension(pdf_template_file);

      // sample usage
      function initialize() {
          document.getElementById("myTitle").innerText = pdf_template_file;
          //or use jquery:  $("#myTitle").text(data.first + " - " + data.last);
          document.getElementById("pdf_template_file").innerText = pdf_template_file;
      }

      // use onload or use jquery to call your initialization after the document loads
      window.onload = initialize;
  </script>


  <script>
    const { PDFDocument } = PDFLib;

    async function fillForm() {
        // Fetch the PDF with form fields
      const formUrl = data.pdf_template_file_url
      //const formPdfBytes = await fetch(formUrl).then(res => res.arrayBuffer())
      const formPdfBytes = await fetch(formUrl, {
        redirect: "follow",
        mode: 'no-cors',
        method: 'GET',
        headers: {
          'Content-Type': 'application/pdf',
        }
      }).then(res => res.arrayBuffer());

      // Load a PDF with form fields
      const pdfDoc = await PDFDocument.load(formPdfBytes);

      // Get the form containing all the fields
      const form = pdfDoc.getForm()

      // Get all fields in the PDF by their names

      const invIDField = form.getTextField('invID')
      const makeAndModelField = form.getTextField('makeAndModel')
      const nameField = form.getTextField('name')
      const addressField = form.getTextField('address')
      const phoneNumberField = form.getTextField('phoneNumber')
      const emailAddressField = form.getTextField('emailAddress')
      const dateField = form.getTextField('date')

      // Output file name
      const INPUT_FNAME = "AN"
      const INPUT_LNAME = "Other"

      // Fill in the basic info fields
      invIDField.setText()
      makeAndModelField.setText()
      nameField.setText(INPUT_FNAME + " " + INPUT_LNAME)
      addressField.setText()
      phoneNumberField.setText()
      emailAddressField.setText()
      dateField.setText()

      // Serialize the PDFDocument to bytes (a Uint8Array)
      const pdfBytes = await pdfDoc.save({updateFieldAppearances: false})

      const outputPDFfilename = pdf_template_file_basename + "." + INPUT_FNAME + "_" + INPUT_LNAME + ".pdf"

            // Trigger the browser to download the PDF document
      download(pdfBytes, outputPDFfilename, "application/pdf");
    }
  </script>
</html>

I have replicated the contents of the html file on my testing webserver. The server has 3 files: index.html, stykesheet.css and form1.pdf

The pdf (on the web server) is the same pdf file that is stored on Google drive.

On my server the following works:

  1. if I use the pdf file that is in the same folder as the html and css files, a filled pdf is offered for download.

...but the following doesn't work:

  1. if I use the same URL that is generated by getPDFfileURL(), nothing happens and no filled pdf is offered for download.

So the question is:
How do I generate the correct URL (for the pdf file stored in Google Drive), so it can then be used by PDF-lib.js (in the htmlTemplate)?

stm
  • 662
  • 1
  • 6
  • 23
David Fear
  • 21
  • 5
  • The pdf should be shared publicly. Otherwise, Google probably won't provide you the download, even though you have the url. Try one of the irl you get in a incognito tab and see what is downloaded. – TheMaster Jul 17 '22 at 15:55
  • You don't show your templeted HTML so it is hard to see what you are doing with `{ pdf_template_file: "form1.pdf", pdf_template_file_url: pdf_template_file_url }`. Are you sure you are passing the correct parameters to PDF-lib? I have not used PDF-lib so not sure of the work process there. – TheWizEd Jul 17 '22 at 15:55
  • @TheWizEd - I have update the question with the contents of the htmlTemplate. – David Fear Jul 17 '22 at 16:43
  • @TheMaster - The pdf *should be* accessed by the same user that runs the google apps script. The pdf is not supposed to be publicly accessible. If you want a pdf to fill in; use the one provided in the jsFiddle example. – David Fear Jul 17 '22 at 16:47
  • When you say "nothing happens and no filled pdf is offered for download" do you mean `initialize` isn't run so that you don't see the file name? – TheWizEd Jul 17 '22 at 17:02
  • 1
    The error suggests that the fetched document isn't pdf. It's probably a html page redirect to Google login or something. You say they are the same user. But how would Google know that they are the same? – TheMaster Jul 17 '22 at 18:24
  • @TheMaster - You are probably correct. Whatever is being stored in "formPdfBytes", is not a pdf document. – David Fear Jul 17 '22 at 18:44
  • @TheMaster - modal dialog box was created by an authenticated user, so the html/javascript/css in the htmlOutput object should be run/displayed/fetched by the same authenticated user; is this not true? – David Fear Jul 17 '22 at 18:48
  • @TheWizEd - "When you say "nothing happens and no filled pdf is offered for download" do you mean initialize isn't run so that you don't see the file name? " `initialize` runs just fine. It's the `const formPdfBytes = await fetch(formUrl, {` line inside `fillForm()` that is failing. The URL stored `formUrl` just doesn't result in a pdf document when fetched. – David Fear Jul 17 '22 at 18:53
  • That's not true. At bare minimum, you need to `include` cookies in your `fetch` call. As this is cross origin request, I expect it to fail anyway. Why don't you test out my hypothesis by sharing a pdf public and trying out your script using that pdf? – TheMaster Jul 17 '22 at 19:26
  • @TheMaster - "That's not true. At bare minimum, you need to include cookies in your fetch call. As this is cross origin request, I expect it to fail anyway. Why don't you test out my hypothesis by sharing a pdf public and trying out your script using that pdf?" just made the pdf publicly accessible. I re-ran the GAS script and got the same result. – David Fear Jul 17 '22 at 19:52
  • @TheMaster - The problem seems to stem from: `Access to fetch at 'https://drive.google.com/uc?id=1y8F5NgnK50JhlWSR6v1b8pELsddBJhvu&export=download' from origin 'https://n-w6vwh325krlfuvo547or2utiltyscz4fytrwjmy-0lu-script.googleusercontent.com' has been blocked by CORS policy: Response to preflight request doesn't pass access control check: No 'Access-Control-Allow-Origin' header is present on the requested resource. If an opaque response serves your needs, set the request's mode to 'no-cors' to fetch the resource with CORS disabled.` This is the result of setting `mode: 'cors'` in `fetch()`. – David Fear Jul 17 '22 at 19:52
  • 1
    If it's a small file, i'd download the file server side using `ScriptApp.getOAuthToken()`( access token) as a byte array and send the data client side, where it can be accessed as a buffer. – TheMaster Jul 17 '22 at 19:59
  • @TheMaster setting `mode: 'no-cors'` in `fetch()` results in the `fetch()` semi working. it appears that the redirect is being stored in `formPdfBytes`. This is the javascript in question: ` const formPdfBytes = await fetch(formUrl, { method: 'GET', mode: 'no-cors', credentials: 'include', headers: { 'Content-Type': 'application/pdf', }, redirect: "follow" }).then(res => res.arrayBuffer()); // Load a PDF with form fields const pdfDoc = await PDFDocument.load(formPdfBytes);` – David Fear Jul 17 '22 at 20:03
  • `mode: 'no-cors'` results in redirect being stored and `mode: 'cors'` results in `cors` error. – David Fear Jul 17 '22 at 20:03
  • @TheMaster "f it's a small file, i'd download the file server side using ScriptApp.getOAuthToken()( access token) as a byte array and send the data client side, where it can be accessed as a buffer." My thoughts for a solution: 1) Generate the correct URL in GAS. 2) Do what you just suggested. – David Fear Jul 17 '22 at 20:07
  • 1
    @TheMaster - this [post](https://stackoverflow.com/questions/60181888/google-apps-script-get-range-of-bytes-from-binary-file) seems to have the answer: – David Fear Jul 17 '22 at 20:14
  • If you have a specific answer, you can post it below. See [answer] – TheMaster Jul 18 '22 at 09:01

1 Answers1

1

The answer is:

  1. fetch the pdf file from Google drive as raw bytes.
  2. encode the bytes as base64.
  3. pass the base64 string from the GAS function to the htmlTemplate.

This is the function that gets the pdf file and returns it as a base64 encoded string:

function getPDFfileAsBase64() {
  var fileId = "";
  var url = "https://drive.google.com/uc?id=" + fileId + "&alt=media";
  console.log("url: " + url);

  var params = {
    method: "get",
    headers: {
      Authorization: "Bearer " + ScriptApp.getOAuthToken(),
    },
  };
  var bytes = UrlFetchApp.fetch(url, params).getContent();
  var encoded = Utilities.base64Encode(bytes);
  //console.log("encoded: " + encoded);

  return encoded;
}

the function to create a modal dialog is:

function showModalDialog_downloadFilledPDFform_form1() {
  // Display a modal dialog box with custom HtmlService content.

  var pdf_template_file = "form1.pdf";
  var pdf_template_file_AsBase64 = getPDFfileAsBase64(pdf_template_file);
  var htmlTemplate = HtmlService.createTemplateFromFile('downloadFilledPDFformHTML');
  
  htmlTemplate.dataFromServerTemplate = { pdf_template_file: pdf_template_file,  pdf_template_file_AsBase64: pdf_template_file_AsBase64};
  
  var htmlOutput = htmlTemplate.evaluate();
  htmlOutput.setWidth(648.1);
  htmlOutput.setHeight(286.300)
  SpreadsheetApp.getUi().showModalDialog(htmlOutput, 'Download filled PDF');
} 
David Fear
  • 21
  • 5