Instagram can now rip from single pages

This commit is contained in:
cyian-1756 2017-11-07 22:24:32 -05:00
parent 14130580d7
commit a3b533922b

View File

@ -15,6 +15,11 @@ import org.json.JSONObject;
import com.rarchives.ripme.ripper.AbstractJSONRipper; import com.rarchives.ripme.ripper.AbstractJSONRipper;
import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Http;
import org.jsoup.Connection.Response;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class InstagramRipper extends AbstractJSONRipper { public class InstagramRipper extends AbstractJSONRipper {
private String userID; private String userID;
@ -73,11 +78,21 @@ public class InstagramRipper extends AbstractJSONRipper {
public JSONObject getFirstPage() throws IOException { public JSONObject getFirstPage() throws IOException {
userID = getUserID(url); userID = getUserID(url);
String baseURL = "http://instagram.com/" + userID + "/media"; String jsonText = "";
try { try {
return Http.url(baseURL).getJSON(); Document firstPage = Http.url("http://instagram.com/" + userID).get();
for (Element script : firstPage.select("script[type=text/javascript]")) {
logger.info("Found script");
if (script.data().contains("window._sharedData = ")) {
jsonText = script.data().replaceAll("window._sharedData = ", "");
jsonText = jsonText.replaceAll("};", "}");
}
}
logger.debug(jsonText);
return new JSONObject(jsonText);
} catch (JSONException e) { } catch (JSONException e) {
throw new IOException("Could not get instagram user via: " + baseURL); throw new IOException("Could not get instagram user");
} }
} }
@ -152,28 +167,30 @@ public class InstagramRipper extends AbstractJSONRipper {
@Override @Override
public List<String> getURLsFromJSON(JSONObject json) { public List<String> getURLsFromJSON(JSONObject json) {
List<String> imageURLs = new ArrayList<>(); List<String> imageURLs = new ArrayList<>();
JSONArray datas = json.getJSONArray("items"); JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage");
JSONArray datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes");
for (int i = 0; i < datas.length(); i++) { for (int i = 0; i < datas.length(); i++) {
JSONObject data = (JSONObject) datas.get(i); JSONObject data = (JSONObject) datas.get(i);
imageURLs.add(getOriginalUrl(data.getString("thumbnail_src")));
String dataType = data.getString("type"); // String dataType = data.getString("type");
if (dataType.equals("carousel")) { // if (dataType.equals("carousel")) {
JSONArray carouselMedias = data.getJSONArray("carousel_media"); // JSONArray carouselMedias = data.getJSONArray("carousel_media");
for (int carouselIndex = 0; carouselIndex < carouselMedias.length(); carouselIndex++) { // for (int carouselIndex = 0; carouselIndex < carouselMedias.length(); carouselIndex++) {
JSONObject carouselMedia = (JSONObject) carouselMedias.get(carouselIndex); // JSONObject carouselMedia = (JSONObject) carouselMedias.get(carouselIndex);
String imageURL = getMedia(carouselMedia); // String imageURL = getMedia(carouselMedia);
if (!imageURL.equals("")) { // if (!imageURL.equals("")) {
imageURL = getOriginalUrl(imageURL); // imageURL = getOriginalUrl(imageURL);
imageURLs.add(imageURL); // imageURLs.add(imageURL);
} // }
} // }
} else { // } else {
String imageURL = getMedia(data); // String imageURL = getMedia(data);
if (!imageURL.equals("")) { // if (!imageURL.equals("")) {
imageURL = getOriginalUrl(imageURL); // imageURL = getOriginalUrl(imageURL);
imageURLs.add(imageURL); // imageURLs.add(imageURL);
} // }
} // }
if (isThisATest()) { if (isThisATest()) {
break; break;