1

I have an working script which looks kind of...

var page = require('webpage').create();

page.onConsoleMessage = function(msg) {
    console.log(msg);
};


page.open("http://www.any_website.com", function(status) {
    if ( status == "success" ) {
        page.evaluate(function() {
              document.querySelector("input[name='MAIL_ADDRESS']").value = "any@mail.com";
              document.querySelector("input[name='PASSWORD']").value = "the_real_password";
              document.getElementsByName("LOGIN_FORM_SUBMIT")[0].click();
              console.log("Login submitted!");
        });
        window.setTimeout(function () {
            var ua = page.evaluate(function () {
                return document.getElementById('ContentMain').innerHTML;
            });
            console.log(ua);
            phantom.exit();
        }, 20000);
   }
});

As far as good.

But as you might see, I have implemented a fix timeout with 20 seconds after the click on the login button. I want to get rid of this and I want the script to close immediately after the login was done. I was playing around for months now but I wasn't able to find a solution without timing constraints, which would be far more elegant and efficient and robust.

Can somebody help with the adaptation of the code?

thanks

PS: More infos about the functionality of javascript + phantomjs are welcome. I'm not really knowing what I'm doing here and I don't know if the second page.evaluate call makes sense.

PPS: Is there a delay function which waits until the site was fully loaded?

Edit 1:

Thank you for the comments. I can precise "fully loaded" to say that a defined string shall appear in the data. I tried a different approach in looping with setInterval and looking for a specific string in the html data.

This new code isn't working because the script hangs after step 1. I think when I readout the page.content value, the whole phantomjs processing stopps and I wont get page.content to early it won't get the latest data after the login at any time.

The plan was just to poll the html data as long as I find a specific string which I know will appear when the site is loaded.

When I rise the interval to 5000 or higher it can be that the script works because the page.content was called after the final data appeared?! (not sure but that's my explain)

Any idea how to poll the html data without breaking/stopping the site download/processing?

if (!String.prototype.includes) {
  String.prototype.includes = function(search, start) {
    'use strict';
    if (typeof start !== 'number') {
      start = 0;
    }

    if (start + search.length > this.length) {
      return false;
    } else {
      return this.indexOf(search, start) !== -1;
    }
  };
}

var page = require('webpage').create(), testindex = 0, loadInProgress = false, delayedLoad = false;

page.onConsoleMessage = function(msg) {
    console.log(msg);
};

page.onLoadStarted = function() {
  loadInProgress = true;
  console.log("load started");
};

page.onLoadFinished = function() {
  loadInProgress = false;
  console.log("load finished");
};

var steps = [
  function() {
    //Load Login Page
    page.open("http://www.any_website.com");
  },
  function() {
    //Enter Credentials and login
    page.evaluate(function() {
      document.querySelector("input[name='MAIL_ADDRESS']").value = "real_name";
      document.querySelector("input[name='PASSWORD']").value = "real_password";
      document.getElementsByName("LOGIN_FORM_SUBMIT")[0].click();
    });
  }, 
  function() {
    // Output content of page to stdout after form has been submitted
    page.render('out.png');
    page.evaluate(function() {
      console.log(document.getElementById('ContentMain').innerHTML);
    });
  }
];

// this is for signalizing phantomjs when all the data has finished loading
var stepstop = [ "", "Stop Text at the End of the needed Data", ""];



interval = setInterval(function() {
  if (!loadInProgress && typeof steps[testindex] == "function") {
    if (delayedLoad == false) {
      console.log("step " + testindex);
      steps[testindex]();
    }

    if (stepstop[testindex] != "") {
      var tempHTML = page.content;
          // console.log("b " + tempHTML.length);
          console.log("c " + stepstop[testindex]);
          // console.log("d " + tempHTML);
          console.log("e " + tempHTML.includes(stepstop[testindex]));
      if (tempHTML.includes(stepstop[testindex]) != false) {
        console.log("step " + testindex + ": HTML stop found");
        delayedLoad = false;
        testindex++;
      } else {
        console.log("step " + testindex + ": HTML stop not found");
        delayedLoad = true;
      }
    } else {
      console.log("step " + testindex + ": no HTML stop search needed");
      testindex++;
    }
  }

  if (typeof steps[testindex] != "function") {
    console.log("shutdown phantom");
    phantom.exit();
  }
}, 100);
fpdragon
  • 1,867
  • 4
  • 25
  • 36
  • 1
    Look over here: http://stackoverflow.com/questions/9246438/how-to-submit-a-form-using-phantomjs - This addresses your problem, if I'm not mistaken. – Tomalak Feb 22 '16 at 17:33
  • Possible duplicate of [phantomjs not waiting for "full" page load](http://stackoverflow.com/questions/11340038/phantomjs-not-waiting-for-full-page-load) – Artjom B. Feb 22 '16 at 19:26
  • These two approaches are the best: [Checking for an outstanding network activity](http://stackoverflow.com/a/21401636/1816580) and [Checking that all requests are finished](http://stackoverflow.com/a/14748934/1816580). Of course you can always re-use `waitFor` to wait for a specific selector that signifies that a page is fully loaded. – Artjom B. Feb 22 '16 at 19:28
  • Besides, how do you define "fully loaded"? – Artjom B. Feb 22 '16 at 19:29
  • 1
    Or just use setInterval instead of setTimeout and check for some condition – pguardiario Feb 23 '16 at 06:53

1 Answers1

0

Ok... At last I found a solution...

I completely switched from phantomjs to Selenium + Webdriver (Chrome Browser) + C# API.

This works far more better for me and it allows to implement more complex mechanisms to find a user defined "load finished" criteria.

Maybe it was just me, but with PhantomJS and JavaScript I didn't managed to come to a solution.

fpdragon
  • 1,867
  • 4
  • 25
  • 36