IG ripper can now rip images from all pages
This commit is contained in:
parent
a3b533922b
commit
dbdedd7d5a
@ -73,14 +73,10 @@ public class InstagramRipper extends AbstractJSONRipper {
|
|||||||
|
|
||||||
throw new IOException("Unable to find userID at " + this.url);
|
throw new IOException("Unable to find userID at " + this.url);
|
||||||
}
|
}
|
||||||
|
private JSONObject getJSONFromPage(String url) throws IOException {
|
||||||
@Override
|
|
||||||
public JSONObject getFirstPage() throws IOException {
|
|
||||||
userID = getUserID(url);
|
|
||||||
|
|
||||||
String jsonText = "";
|
String jsonText = "";
|
||||||
try {
|
try {
|
||||||
Document firstPage = Http.url("http://instagram.com/" + userID).get();
|
Document firstPage = Http.url(url).get();
|
||||||
for (Element script : firstPage.select("script[type=text/javascript]")) {
|
for (Element script : firstPage.select("script[type=text/javascript]")) {
|
||||||
logger.info("Found script");
|
logger.info("Found script");
|
||||||
|
|
||||||
@ -89,37 +85,42 @@ public class InstagramRipper extends AbstractJSONRipper {
|
|||||||
jsonText = jsonText.replaceAll("};", "}");
|
jsonText = jsonText.replaceAll("};", "}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
logger.debug(jsonText);
|
|
||||||
return new JSONObject(jsonText);
|
return new JSONObject(jsonText);
|
||||||
} catch (JSONException e) {
|
} catch (JSONException e) {
|
||||||
throw new IOException("Could not get instagram user");
|
throw new IOException("Could not get JSON from page " + url);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public JSONObject getNextPage(JSONObject json) throws IOException {
|
public JSONObject getFirstPage() throws IOException {
|
||||||
|
userID = getUserID(url);
|
||||||
boolean nextPageAvailable;
|
return getJSONFromPage("http://instagram.com/" + userID);
|
||||||
try {
|
|
||||||
nextPageAvailable = json.getBoolean("more_available");
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new IOException("No additional pages found");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nextPageAvailable) {
|
// @Override
|
||||||
JSONArray items = json.getJSONArray("items");
|
// public JSONObject getNextPage(JSONObject json) throws IOException {
|
||||||
JSONObject last_item = items.getJSONObject(items.length() - 1);
|
//
|
||||||
String nextMaxID = last_item.getString("id");
|
// boolean nextPageAvailable;
|
||||||
|
// try {
|
||||||
String baseURL = "http://instagram.com/" + userID + "/media/?max_id=" + nextMaxID;
|
// nextPageAvailable = json.getBoolean("more_available");
|
||||||
logger.info("Loading " + baseURL);
|
// } catch (Exception e) {
|
||||||
sleep(1000);
|
// throw new IOException("No additional pages found");
|
||||||
|
// }
|
||||||
return Http.url(baseURL).getJSON();
|
//
|
||||||
} else {
|
// if (nextPageAvailable) {
|
||||||
throw new IOException("No more images found");
|
// JSONArray items = json.getJSONArray("items");
|
||||||
}
|
// JSONObject last_item = items.getJSONObject(items.length() - 1);
|
||||||
}
|
// String nextMaxID = last_item.getString("id");
|
||||||
|
//
|
||||||
|
// String baseURL = "http://instagram.com/" + userID + "/?max_id=" + nextMaxID;
|
||||||
|
// logger.info("Loading " + baseURL);
|
||||||
|
// sleep(1000);
|
||||||
|
//
|
||||||
|
// return Http.url(baseURL).getJSON();
|
||||||
|
// } else {
|
||||||
|
// throw new IOException("No more images found");
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
private String getOriginalUrl(String imageURL) {
|
private String getOriginalUrl(String imageURL) {
|
||||||
imageURL = imageURL.replaceAll("scontent.cdninstagram.com/hphotos-", "igcdn-photos-d-a.akamaihd.net/hphotos-ak-");
|
imageURL = imageURL.replaceAll("scontent.cdninstagram.com/hphotos-", "igcdn-photos-d-a.akamaihd.net/hphotos-ak-");
|
||||||
@ -166,12 +167,18 @@ public class InstagramRipper extends AbstractJSONRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromJSON(JSONObject json) {
|
public List<String> getURLsFromJSON(JSONObject json) {
|
||||||
|
String nextPageID = "";
|
||||||
List<String> imageURLs = new ArrayList<>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage");
|
JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage");
|
||||||
JSONArray datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes");
|
JSONArray datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes");
|
||||||
for (int i = 0; i < datas.length(); i++) {
|
for (int i = 0; i < datas.length(); i++) {
|
||||||
JSONObject data = (JSONObject) datas.get(i);
|
JSONObject data = (JSONObject) datas.get(i);
|
||||||
imageURLs.add(getOriginalUrl(data.getString("thumbnail_src")));
|
try {
|
||||||
|
addURLToDownload(new URL(getOriginalUrl(data.getString("thumbnail_src"))));
|
||||||
|
} catch (MalformedURLException e) {
|
||||||
|
return imageURLs;
|
||||||
|
}
|
||||||
|
nextPageID = data.getString("id");
|
||||||
|
|
||||||
// String dataType = data.getString("type");
|
// String dataType = data.getString("type");
|
||||||
// if (dataType.equals("carousel")) {
|
// if (dataType.equals("carousel")) {
|
||||||
@ -196,6 +203,12 @@ public class InstagramRipper extends AbstractJSONRipper {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (!nextPageID.equals("")) {
|
||||||
|
try {
|
||||||
|
sleep(1000);
|
||||||
|
getURLsFromJSON(getJSONFromPage("https://www.instagram.com/annabellpeaksxx/?max_id=" + nextPageID));
|
||||||
|
} catch (IOException e){ return imageURLs;}
|
||||||
|
}
|
||||||
return imageURLs;
|
return imageURLs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user