IG ripper can now rip images from all pages

This commit is contained in:
cyian-1756 2017-11-07 23:05:08 -05:00
parent a3b533922b
commit dbdedd7d5a

View File

@ -73,14 +73,10 @@ public class InstagramRipper extends AbstractJSONRipper {
throw new IOException("Unable to find userID at " + this.url); throw new IOException("Unable to find userID at " + this.url);
} }
private JSONObject getJSONFromPage(String url) throws IOException {
@Override
public JSONObject getFirstPage() throws IOException {
userID = getUserID(url);
String jsonText = ""; String jsonText = "";
try { try {
Document firstPage = Http.url("http://instagram.com/" + userID).get(); Document firstPage = Http.url(url).get();
for (Element script : firstPage.select("script[type=text/javascript]")) { for (Element script : firstPage.select("script[type=text/javascript]")) {
logger.info("Found script"); logger.info("Found script");
@ -89,37 +85,42 @@ public class InstagramRipper extends AbstractJSONRipper {
jsonText = jsonText.replaceAll("};", "}"); jsonText = jsonText.replaceAll("};", "}");
} }
} }
logger.debug(jsonText);
return new JSONObject(jsonText); return new JSONObject(jsonText);
} catch (JSONException e) { } catch (JSONException e) {
throw new IOException("Could not get instagram user"); throw new IOException("Could not get JSON from page " + url);
} }
} }
@Override @Override
public JSONObject getNextPage(JSONObject json) throws IOException { public JSONObject getFirstPage() throws IOException {
userID = getUserID(url);
boolean nextPageAvailable; return getJSONFromPage("http://instagram.com/" + userID);
try {
nextPageAvailable = json.getBoolean("more_available");
} catch (Exception e) {
throw new IOException("No additional pages found");
} }
if (nextPageAvailable) { // @Override
JSONArray items = json.getJSONArray("items"); // public JSONObject getNextPage(JSONObject json) throws IOException {
JSONObject last_item = items.getJSONObject(items.length() - 1); //
String nextMaxID = last_item.getString("id"); // boolean nextPageAvailable;
// try {
String baseURL = "http://instagram.com/" + userID + "/media/?max_id=" + nextMaxID; // nextPageAvailable = json.getBoolean("more_available");
logger.info("Loading " + baseURL); // } catch (Exception e) {
sleep(1000); // throw new IOException("No additional pages found");
// }
return Http.url(baseURL).getJSON(); //
} else { // if (nextPageAvailable) {
throw new IOException("No more images found"); // JSONArray items = json.getJSONArray("items");
} // JSONObject last_item = items.getJSONObject(items.length() - 1);
} // String nextMaxID = last_item.getString("id");
//
// String baseURL = "http://instagram.com/" + userID + "/?max_id=" + nextMaxID;
// logger.info("Loading " + baseURL);
// sleep(1000);
//
// return Http.url(baseURL).getJSON();
// } else {
// throw new IOException("No more images found");
// }
// }
private String getOriginalUrl(String imageURL) { private String getOriginalUrl(String imageURL) {
imageURL = imageURL.replaceAll("scontent.cdninstagram.com/hphotos-", "igcdn-photos-d-a.akamaihd.net/hphotos-ak-"); imageURL = imageURL.replaceAll("scontent.cdninstagram.com/hphotos-", "igcdn-photos-d-a.akamaihd.net/hphotos-ak-");
@ -166,12 +167,18 @@ public class InstagramRipper extends AbstractJSONRipper {
@Override @Override
public List<String> getURLsFromJSON(JSONObject json) { public List<String> getURLsFromJSON(JSONObject json) {
String nextPageID = "";
List<String> imageURLs = new ArrayList<>(); List<String> imageURLs = new ArrayList<>();
JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage"); JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage");
JSONArray datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes"); JSONArray datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes");
for (int i = 0; i < datas.length(); i++) { for (int i = 0; i < datas.length(); i++) {
JSONObject data = (JSONObject) datas.get(i); JSONObject data = (JSONObject) datas.get(i);
imageURLs.add(getOriginalUrl(data.getString("thumbnail_src"))); try {
addURLToDownload(new URL(getOriginalUrl(data.getString("thumbnail_src"))));
} catch (MalformedURLException e) {
return imageURLs;
}
nextPageID = data.getString("id");
// String dataType = data.getString("type"); // String dataType = data.getString("type");
// if (dataType.equals("carousel")) { // if (dataType.equals("carousel")) {
@ -196,6 +203,12 @@ public class InstagramRipper extends AbstractJSONRipper {
break; break;
} }
} }
if (!nextPageID.equals("")) {
try {
sleep(1000);
getURLsFromJSON(getJSONFromPage("https://www.instagram.com/annabellpeaksxx/?max_id=" + nextPageID));
} catch (IOException e){ return imageURLs;}
}
return imageURLs; return imageURLs;
} }