Merge pull request #459 from cyian-1756/lastseenfix

lastseen feature now works with instagram
This commit is contained in:
cyian-1756 2018-03-10 12:14:22 -05:00 committed by GitHub
commit 04e7c1e82b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -24,6 +24,7 @@ import com.rarchives.ripme.utils.Utils;
public class InstagramRipper extends AbstractHTMLRipper { public class InstagramRipper extends AbstractHTMLRipper {
String nextPageID = "";
private String userID; private String userID;
@ -198,7 +199,6 @@ public class InstagramRipper extends AbstractHTMLRipper {
@Override @Override
public List<String> getURLsFromPage(Document doc) { public List<String> getURLsFromPage(Document doc) {
String nextPageID = "";
List<String> imageURLs = new ArrayList<>(); List<String> imageURLs = new ArrayList<>();
JSONObject json = new JSONObject(); JSONObject json = new JSONObject();
try { try {
@ -261,33 +261,7 @@ public class InstagramRipper extends AbstractHTMLRipper {
break; break;
} }
} }
// Rip the next page
if (!nextPageID.equals("") && !isThisATest()) {
if (url.toExternalForm().contains("/tags/")) {
try {
// Sleep for a while to avoid a ban
sleep(2500);
if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) {
getURLsFromPage(Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get());
} else {
getURLsFromPage(Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get());
}
} catch (IOException e) {
return imageURLs;
}
}
try {
// Sleep for a while to avoid a ban
sleep(2500);
getURLsFromPage(Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get());
} catch (IOException e) {
return imageURLs;
}
} else {
logger.warn("Can't get net page");
}
} else { // We're ripping from a single page } else { // We're ripping from a single page
logger.info("Ripping from single page"); logger.info("Ripping from single page");
imageURLs = getPostsFromSinglePage(doc); imageURLs = getPostsFromSinglePage(doc);
@ -296,9 +270,65 @@ public class InstagramRipper extends AbstractHTMLRipper {
return imageURLs; return imageURLs;
} }
@Override
public Document getNextPage(Document doc) throws IOException {
Document toreturn;
if (!nextPageID.equals("") && !isThisATest()) {
if (url.toExternalForm().contains("/tags/")) {
try {
// Sleep for a while to avoid a ban
sleep(2500);
if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) {
toreturn = Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get();
} else {
toreturn = Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get();
}
logger.info(toreturn.html());
if (!hasImage(toreturn)) {
throw new IOException("No more pages");
}
return toreturn;
} catch (IOException e) {
throw new IOException("No more pages");
}
}
try {
// Sleep for a while to avoid a ban
sleep(2500);
toreturn = Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get();
if (!hasImage(toreturn)) {
throw new IOException("No more pages");
}
return toreturn;
} catch (IOException e) {
return null;
}
} else {
throw new IOException("No more pages");
}
}
@Override @Override
public void downloadURL(URL url, int index) { public void downloadURL(URL url, int index) {
addURLToDownload(url); addURLToDownload(url);
} }
private boolean hasImage(Document doc) {
try {
JSONObject json = getJSONFromPage(doc);
JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage");
JSONArray datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes");
logger.info(datas.length());
if (datas.length() == 0) {
return false;
}
return true;
} catch (IOException e) {
return false;
}
}
} }