diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java index 291dd7df..4455270e 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java @@ -69,7 +69,7 @@ public abstract class AbstractJSONRipper extends AlbumRipper { } } - if (imageURLs.isEmpty()) { + if (imageURLs.isEmpty() && !hasASAPRipping()) { throw new IOException("No images found at " + this.url); } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index cb410fb3..3077df94 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -76,6 +76,10 @@ public class InstagramRipper extends AbstractJSONRipper { return url.replaceAll("/[A-Z0-9]{8}/", "/"); } + @Override public boolean hasASAPRipping() { + return true; + } + private List getPostsFromSinglePage(JSONObject json) { List imageURLs = new ArrayList<>(); JSONArray datas; @@ -231,9 +235,23 @@ public class InstagramRipper extends AbstractJSONRipper { return imageURL; } + public String getAfter(JSONObject json) { + try { + return json.getJSONObject("entry_data").getJSONArray("ProfilePage").getJSONObject(0) + .getJSONObject("graphql").getJSONObject("user") + .getJSONObject("edge_owner_to_timeline_media").getJSONObject("page_info").getString("end_cursor"); + } catch (JSONException e) { + return json.getJSONObject("data").getJSONObject("user") + .getJSONObject("edge_owner_to_timeline_media").getJSONObject("page_info").getString("end_cursor"); + } + } + @Override public List getURLsFromJSON(JSONObject json) { List imageURLs = new ArrayList<>(); + if (!url.toExternalForm().contains("/p/")) { + nextPageID = getAfter(json); + } // get the rhx_gis value so we can get the next page later on if (rhx_gis == null) { @@ -251,7 +269,7 @@ public class InstagramRipper extends AbstractJSONRipper { .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges"); } catch (JSONException e) { datas = json.getJSONObject("data").getJSONObject("user") - .getJSONObject("edge_user_to_photos_of_you").getJSONArray("edges"); + .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges"); } } else { try { @@ -301,11 +319,10 @@ public class InstagramRipper extends AbstractJSONRipper { } } } catch (MalformedURLException e) { + LOGGER.info("Got MalformedURLException"); return imageURLs; } - nextPageID = data.getString("id"); - if (isThisATest()) { break; } @@ -369,10 +386,11 @@ public class InstagramRipper extends AbstractJSONRipper { try { // Sleep for a while to avoid a ban sleep(2500); - String vars = "{\"id\":\"" + userID + "\",\"first\":50,\"after\":\"" + nextPageID + "\"}"; + String vars = "{\"id\":\"" + userID + "\",\"first\":12,\"after\":\"" + nextPageID + "\"}"; String ig_gis = getIGGis(vars); LOGGER.info(ig_gis); + LOGGER.info("https://www.instagram.com/graphql/query/?query_hash=" + qHash + "&variables=" + vars); toreturn = getPage("https://www.instagram.com/graphql/query/?query_hash=" + qHash + "&variables=" + vars, ig_gis); if (!pageHasImages(toreturn)) { throw new IOException("No more pages"); @@ -394,7 +412,7 @@ public class InstagramRipper extends AbstractJSONRipper { private boolean pageHasImages(JSONObject json) { LOGGER.info(json); int numberOfImages = json.getJSONObject("data").getJSONObject("user") - .getJSONObject("edge_user_to_photos_of_you").getJSONArray("edges").length(); + .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges").length(); if (numberOfImages == 0) { return false; } @@ -453,26 +471,11 @@ public class InstagramRipper extends AbstractJSONRipper { return null; } if (!rippingTag) { - Pattern jsP = Pattern.compile("o},queryId:.([a-zA-Z0-9]+)."); + Pattern jsP = Pattern.compile("m=\"9ca88e465c3f866a76f7adee3871bdd8\",g=Object\\(c.b\\)\\(\\{pageSize:p.a,pagesToPreload:0,getState:function\\(e,t\\)\\{var o;return null===\\(o=e.profilePosts.byUserId.get\\(t\\)\\)\\|\\|void 0===o\\?void 0:o\\.pagination},queryId:.([a-zA-Z0-9]+)"); Matcher m = jsP.matcher(sb.toString()); if (m.find()) { return m.group(1); } - jsP = Pattern.compile("n.pagination:n},queryId:.([a-zA-Z0-9]+)."); - m = jsP.matcher(sb.toString()); - if (m.find()) { - return m.group(1); - } - jsP = Pattern.compile("0:n.pagination},queryId:.([a-zA-Z0-9]+)."); - m = jsP.matcher(sb.toString()); - if (m.find()) { - return m.group(1); - } - jsP = Pattern.compile("o.pagination},queryId:.([a-zA-Z0-9]+)."); - m = jsP.matcher(sb.toString()); - if (m.find()) { - return m.group(1); - } } else { Pattern jsP = Pattern.compile("return e.tagMedia.byTagName.get\\(t\\).pagination},queryId:.([a-zA-Z0-9]+).");