IG ripper now no longer errors out on last page

This commit is contained in:
cyian-1756 2018-03-09 18:06:01 -05:00
parent 4af4691893
commit ecf7a4b623

View File

@ -271,34 +271,43 @@ public class InstagramRipper extends AbstractHTMLRipper {
} }
@Override @Override
public Document getNextPage(Document doc) { public Document getNextPage(Document doc) throws IOException {
Document toreturn;
if (!nextPageID.equals("") && !isThisATest()) { if (!nextPageID.equals("") && !isThisATest()) {
if (url.toExternalForm().contains("/tags/")) { if (url.toExternalForm().contains("/tags/")) {
try { try {
// Sleep for a while to avoid a ban // Sleep for a while to avoid a ban
sleep(2500); sleep(2500);
if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) { if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) {
return Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get(); toreturn = Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get();
} else { } else {
return Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get(); toreturn = Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get();
} }
logger.info(toreturn.html());
if (!hasImage(toreturn)) {
throw new IOException("No more pages");
}
return toreturn;
} catch (IOException e) { } catch (IOException e) {
return null; throw new IOException("No more pages");
} }
} }
try { try {
// Sleep for a while to avoid a ban // Sleep for a while to avoid a ban
sleep(2500); sleep(2500);
return Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get(); toreturn = Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get();
if (!hasImage(toreturn)) {
throw new IOException("No more pages");
}
return toreturn;
} catch (IOException e) { } catch (IOException e) {
return null; return null;
} }
} else { } else {
logger.warn("Can't get net page"); throw new IOException("No more pages");
} }
return null;
} }
@Override @Override
@ -306,4 +315,20 @@ public class InstagramRipper extends AbstractHTMLRipper {
addURLToDownload(url); addURLToDownload(url);
} }
private boolean hasImage(Document doc) {
try {
JSONObject json = getJSONFromPage(doc);
JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage");
JSONArray datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes");
logger.info(datas.length());
if (datas.length() == 0) {
return false;
}
return true;
} catch (IOException e) {
return false;
}
}
} }