From a3b533922b4d62a8cc5ff421e0b7b9d182adb71d Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Tue, 7 Nov 2017 22:24:32 -0500 Subject: [PATCH] Instagram can now rip from single pages --- .../ripme/ripper/rippers/InstagramRipper.java | 61 ++++++++++++------- 1 file changed, 39 insertions(+), 22 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index 92cb97a4..6ce96e10 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -15,6 +15,11 @@ import org.json.JSONObject; import com.rarchives.ripme.ripper.AbstractJSONRipper; import com.rarchives.ripme.utils.Http; +import org.jsoup.Connection.Response; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + public class InstagramRipper extends AbstractJSONRipper { private String userID; @@ -73,11 +78,21 @@ public class InstagramRipper extends AbstractJSONRipper { public JSONObject getFirstPage() throws IOException { userID = getUserID(url); - String baseURL = "http://instagram.com/" + userID + "/media"; + String jsonText = ""; try { - return Http.url(baseURL).getJSON(); + Document firstPage = Http.url("http://instagram.com/" + userID).get(); + for (Element script : firstPage.select("script[type=text/javascript]")) { + logger.info("Found script"); + + if (script.data().contains("window._sharedData = ")) { + jsonText = script.data().replaceAll("window._sharedData = ", ""); + jsonText = jsonText.replaceAll("};", "}"); + } + } + logger.debug(jsonText); + return new JSONObject(jsonText); } catch (JSONException e) { - throw new IOException("Could not get instagram user via: " + baseURL); + throw new IOException("Could not get instagram user"); } } @@ -152,28 +167,30 @@ public class InstagramRipper extends AbstractJSONRipper { @Override public List getURLsFromJSON(JSONObject json) { List imageURLs = new ArrayList<>(); - JSONArray datas = json.getJSONArray("items"); + JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage"); + JSONArray datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes"); for (int i = 0; i < datas.length(); i++) { JSONObject data = (JSONObject) datas.get(i); + imageURLs.add(getOriginalUrl(data.getString("thumbnail_src"))); - String dataType = data.getString("type"); - if (dataType.equals("carousel")) { - JSONArray carouselMedias = data.getJSONArray("carousel_media"); - for (int carouselIndex = 0; carouselIndex < carouselMedias.length(); carouselIndex++) { - JSONObject carouselMedia = (JSONObject) carouselMedias.get(carouselIndex); - String imageURL = getMedia(carouselMedia); - if (!imageURL.equals("")) { - imageURL = getOriginalUrl(imageURL); - imageURLs.add(imageURL); - } - } - } else { - String imageURL = getMedia(data); - if (!imageURL.equals("")) { - imageURL = getOriginalUrl(imageURL); - imageURLs.add(imageURL); - } - } +// String dataType = data.getString("type"); +// if (dataType.equals("carousel")) { +// JSONArray carouselMedias = data.getJSONArray("carousel_media"); +// for (int carouselIndex = 0; carouselIndex < carouselMedias.length(); carouselIndex++) { +// JSONObject carouselMedia = (JSONObject) carouselMedias.get(carouselIndex); +// String imageURL = getMedia(carouselMedia); +// if (!imageURL.equals("")) { +// imageURL = getOriginalUrl(imageURL); +// imageURLs.add(imageURL); +// } +// } +// } else { +// String imageURL = getMedia(data); +// if (!imageURL.equals("")) { +// imageURL = getOriginalUrl(imageURL); +// imageURLs.add(imageURL); +// } +// } if (isThisATest()) { break;