Got InstagramRipper working again

This commit is contained in:
cyian-1756 2018-06-19 16:06:39 -04:00
parent 16cfe8a9dd
commit 867af57713

View File

@ -76,6 +76,10 @@ public class InstagramRipper extends AbstractJSONRipper {
return url.replaceAll("/[A-Z0-9]{8}/", "/"); return url.replaceAll("/[A-Z0-9]{8}/", "/");
} }
@Override public boolean hasASAPRipping() {
return true;
}
private List<String> getPostsFromSinglePage(JSONObject json) { private List<String> getPostsFromSinglePage(JSONObject json) {
List<String> imageURLs = new ArrayList<>(); List<String> imageURLs = new ArrayList<>();
JSONArray datas; JSONArray datas;
@ -231,9 +235,21 @@ public class InstagramRipper extends AbstractJSONRipper {
return imageURL; return imageURL;
} }
public String getAfter(JSONObject json) {
try {
return json.getJSONObject("entry_data").getJSONArray("ProfilePage").getJSONObject(0)
.getJSONObject("graphql").getJSONObject("user")
.getJSONObject("edge_owner_to_timeline_media").getJSONObject("page_info").getString("end_cursor");
} catch (JSONException e) {
return json.getJSONObject("data").getJSONObject("user")
.getJSONObject("edge_owner_to_timeline_media").getJSONObject("page_info").getString("end_cursor");
}
}
@Override @Override
public List<String> getURLsFromJSON(JSONObject json) { public List<String> getURLsFromJSON(JSONObject json) {
List<String> imageURLs = new ArrayList<>(); List<String> imageURLs = new ArrayList<>();
nextPageID = getAfter(json);
// get the rhx_gis value so we can get the next page later on // get the rhx_gis value so we can get the next page later on
if (rhx_gis == null) { if (rhx_gis == null) {
@ -251,7 +267,7 @@ public class InstagramRipper extends AbstractJSONRipper {
.getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges"); .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges");
} catch (JSONException e) { } catch (JSONException e) {
datas = json.getJSONObject("data").getJSONObject("user") datas = json.getJSONObject("data").getJSONObject("user")
.getJSONObject("edge_user_to_photos_of_you").getJSONArray("edges"); .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges");
} }
} else { } else {
try { try {
@ -301,11 +317,10 @@ public class InstagramRipper extends AbstractJSONRipper {
} }
} }
} catch (MalformedURLException e) { } catch (MalformedURLException e) {
LOGGER.info("Got MalformedURLException");
return imageURLs; return imageURLs;
} }
nextPageID = data.getString("id");
if (isThisATest()) { if (isThisATest()) {
break; break;
} }
@ -369,10 +384,11 @@ public class InstagramRipper extends AbstractJSONRipper {
try { try {
// Sleep for a while to avoid a ban // Sleep for a while to avoid a ban
sleep(2500); sleep(2500);
String vars = "{\"id\":\"" + userID + "\",\"first\":50,\"after\":\"" + nextPageID + "\"}"; String vars = "{\"id\":\"" + userID + "\",\"first\":12,\"after\":\"" + nextPageID + "\"}";
String ig_gis = getIGGis(vars); String ig_gis = getIGGis(vars);
LOGGER.info(ig_gis); LOGGER.info(ig_gis);
LOGGER.info("https://www.instagram.com/graphql/query/?query_hash=" + qHash + "&variables=" + vars);
toreturn = getPage("https://www.instagram.com/graphql/query/?query_hash=" + qHash + "&variables=" + vars, ig_gis); toreturn = getPage("https://www.instagram.com/graphql/query/?query_hash=" + qHash + "&variables=" + vars, ig_gis);
if (!pageHasImages(toreturn)) { if (!pageHasImages(toreturn)) {
throw new IOException("No more pages"); throw new IOException("No more pages");
@ -394,7 +410,7 @@ public class InstagramRipper extends AbstractJSONRipper {
private boolean pageHasImages(JSONObject json) { private boolean pageHasImages(JSONObject json) {
LOGGER.info(json); LOGGER.info(json);
int numberOfImages = json.getJSONObject("data").getJSONObject("user") int numberOfImages = json.getJSONObject("data").getJSONObject("user")
.getJSONObject("edge_user_to_photos_of_you").getJSONArray("edges").length(); .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges").length();
if (numberOfImages == 0) { if (numberOfImages == 0) {
return false; return false;
} }
@ -453,26 +469,11 @@ public class InstagramRipper extends AbstractJSONRipper {
return null; return null;
} }
if (!rippingTag) { if (!rippingTag) {
Pattern jsP = Pattern.compile("o},queryId:.([a-zA-Z0-9]+)."); Pattern jsP = Pattern.compile("m=\"9ca88e465c3f866a76f7adee3871bdd8\",g=Object\\(c.b\\)\\(\\{pageSize:p.a,pagesToPreload:0,getState:function\\(e,t\\)\\{var o;return null===\\(o=e.profilePosts.byUserId.get\\(t\\)\\)\\|\\|void 0===o\\?void 0:o\\.pagination},queryId:.([a-zA-Z0-9]+)");
Matcher m = jsP.matcher(sb.toString()); Matcher m = jsP.matcher(sb.toString());
if (m.find()) { if (m.find()) {
return m.group(1); return m.group(1);
} }
jsP = Pattern.compile("n.pagination:n},queryId:.([a-zA-Z0-9]+).");
m = jsP.matcher(sb.toString());
if (m.find()) {
return m.group(1);
}
jsP = Pattern.compile("0:n.pagination},queryId:.([a-zA-Z0-9]+).");
m = jsP.matcher(sb.toString());
if (m.find()) {
return m.group(1);
}
jsP = Pattern.compile("o.pagination},queryId:.([a-zA-Z0-9]+).");
m = jsP.matcher(sb.toString());
if (m.find()) {
return m.group(1);
}
} else { } else {
Pattern jsP = Pattern.compile("return e.tagMedia.byTagName.get\\(t\\).pagination},queryId:.([a-zA-Z0-9]+)."); Pattern jsP = Pattern.compile("return e.tagMedia.byTagName.get\\(t\\).pagination},queryId:.([a-zA-Z0-9]+).");