From ecf7a4b623605cb071f0e8299c9d51f09461e716 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 9 Mar 2018 18:06:01 -0500 Subject: [PATCH] IG ripper now no longer errors out on last page --- .../ripme/ripper/rippers/InstagramRipper.java | 39 +++++++++++++++---- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index 364b645c..ab44edfd 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -271,34 +271,43 @@ public class InstagramRipper extends AbstractHTMLRipper { } @Override - public Document getNextPage(Document doc) { + public Document getNextPage(Document doc) throws IOException { + Document toreturn; if (!nextPageID.equals("") && !isThisATest()) { if (url.toExternalForm().contains("/tags/")) { try { // Sleep for a while to avoid a ban sleep(2500); if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) { - return Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get(); + toreturn = Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get(); } else { - return Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get(); + toreturn = Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get(); } + logger.info(toreturn.html()); + if (!hasImage(toreturn)) { + throw new IOException("No more pages"); + } + return toreturn; } catch (IOException e) { - return null; + throw new IOException("No more pages"); } } try { // Sleep for a while to avoid a ban sleep(2500); - return Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get(); + toreturn = Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get(); + if (!hasImage(toreturn)) { + throw new IOException("No more pages"); + } + return toreturn; } catch (IOException e) { return null; } } else { - logger.warn("Can't get net page"); + throw new IOException("No more pages"); } - return null; } @Override @@ -306,4 +315,20 @@ public class InstagramRipper extends AbstractHTMLRipper { addURLToDownload(url); } + private boolean hasImage(Document doc) { + try { + JSONObject json = getJSONFromPage(doc); + JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage"); + JSONArray datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes"); + logger.info(datas.length()); + if (datas.length() == 0) { + return false; + } + return true; + } catch (IOException e) { + return false; + } + + } + }