attempt at fixing issue #330 instagram api changes

This commit is contained in:
Kastang 2016-06-24 18:07:25 -04:00 committed by metaprime
parent 6321aba898
commit 08ce833863

View File

@ -37,14 +37,12 @@ public class InstagramRipper extends AbstractJSONRipper {
@Override @Override
public boolean canRip(URL url) { public boolean canRip(URL url) {
return (url.getHost().endsWith("instagram.com") return (url.getHost().endsWith("instagram.com"));
|| url.getHost().endsWith("statigr.am")
|| url.getHost().endsWith("iconosquare.com/"));
} }
@Override @Override
public String getGID(URL url) throws MalformedURLException { public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://iconosquare.com/([a-zA-Z0-9\\-_.]{3,}).*$"); Pattern p = Pattern.compile("^https?://instagram.com/([^/]+)");
Matcher m = p.matcher(url.toExternalForm()); Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
return m.group(1); return m.group(1);
@ -54,106 +52,70 @@ public class InstagramRipper extends AbstractJSONRipper {
@Override @Override
public URL sanitizeURL(URL url) throws MalformedURLException { public URL sanitizeURL(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://instagram\\.com/p/([a-zA-Z0-9\\-_.]{1,}).*$"); Pattern p = Pattern.compile("^.*instagram\\.com/([a-zA-Z0-9\\-_.]{3,}).*$");
Matcher m = p.matcher(url.toExternalForm()); Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
// Link to photo, not the user account return new URL("http://instagram.com/" + m.group(1));
try {
url = getUserPageFromImage(url);
} catch (Exception e) {
logger.error("[!] Failed to get user page from " + url, e);
throw new MalformedURLException("Failed to retrieve user page from " + url);
}
} }
p = Pattern.compile("^.*instagram\\.com/([a-zA-Z0-9\\-_.]{3,}).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return new URL("http://iconosquare.com/" + m.group(1));
}
p = Pattern.compile("^.*iconosquare\\.com/([a-zA-Z0-9\\-_.]{3,}).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return new URL("http://iconosquare.com/" + m.group(1));
}
p = Pattern.compile("^.*statigr\\.am/([a-zA-Z0-9\\-_.]{3,}).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return new URL("http://iconosquare.com/" + m.group(1));
}
throw new MalformedURLException("Expected username in URL (instagram.com/username and not " + url);
}
private URL getUserPageFromImage(URL url) throws IOException {
Document doc = Http.url(url).get();
for (Element element : doc.select("meta[property='og:description']")) {
String content = element.attr("content");
if (content.endsWith("'s photo on Instagram")) {
return new URL("http://iconosquare/" + content.substring(0, content.indexOf("'")));
}
}
throw new MalformedURLException("Expected username in URL (instagram.com/username and not " + url); throw new MalformedURLException("Expected username in URL (instagram.com/username and not " + url);
} }
private String getUserID(URL url) throws IOException { private String getUserID(URL url) throws IOException {
this.sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm());
Document doc = Http.url(url).get(); Pattern p = Pattern.compile("^https?://instagram\\.com/([^/]+)");
for (Element element : doc.select("input[id=user_public]")) { Matcher m = p.matcher(url.toExternalForm());
return element.attr("value"); if(m.matches()) {
return m.group(1);
} }
throw new IOException("Unable to find userID at " + this.url); throw new IOException("Unable to find userID at " + this.url);
} }
@Override @Override
public JSONObject getFirstPage() throws IOException { public JSONObject getFirstPage() throws IOException {
userID = getUserID(url); userID = getUserID(url);
String baseURL = "http://iconosquare.com/controller_nl.php?action=getPhotoUserPublic&user_id="
+ userID; String baseURL = "http://instagram.com/" + userID + "/media";
logger.info("Loading " + baseURL);
try { try {
JSONObject result = Http.url(baseURL).getJSON(); JSONObject result = Http.url(baseURL).getJSON();
return result; return result;
} catch (JSONException e) { } catch (JSONException e) {
throw new IOException("Could not get instagram user via iconosquare", e); throw new IOException("Could not get instagram user via: " + baseURL);
} }
} }
@Override @Override
public JSONObject getNextPage(JSONObject json) throws IOException { public JSONObject getNextPage(JSONObject json) throws IOException {
if (isThisATest()) {
return null; boolean nextPageAvailable;
try {
nextPageAvailable = json.getBoolean("more_available");
} catch (Exception e) {
throw new IOException("No additional pages found");
} }
JSONObject pagination = json.getJSONObject("pagination");
String nextMaxID = ""; if(nextPageAvailable) {
JSONArray datas = json.getJSONArray("data"); JSONArray items = json.getJSONArray("items");
for (int i = 0; i < datas.length(); i++) { JSONObject last_item = items.getJSONObject(items.length() - 1);
JSONObject data = datas.getJSONObject(i); String nextMaxID = last_item.getString("id");
if (data.has("id")) {
nextMaxID = data.getString("id"); String baseURL = "http://instagram.com/" + userID + "/media/?max_id=" + nextMaxID;
} logger.info("Loading " + baseURL);
} sleep(1000);
if (nextMaxID.equals("")) {
if (!pagination.has("next_max_id")) { JSONObject nextJSON = Http.url(baseURL).getJSON();
throw new IOException("No next_max_id found, stopping");
} return nextJSON;
nextMaxID = pagination.getString("next_max_id"); } else {
}
String baseURL = "http://iconosquare.com/controller_nl.php?action=getPhotoUserPublic&user_id="
+ userID
+ "&max_id=" + nextMaxID;
logger.info("Loading " + baseURL);
sleep(1000);
JSONObject nextJSON = Http.url(baseURL).getJSON();
datas = nextJSON.getJSONArray("data");
if (datas.length() == 0) {
throw new IOException("No more images found"); throw new IOException("No more images found");
} }
return nextJSON;
} }
@Override @Override
public List<String> getURLsFromJSON(JSONObject json) { public List<String> getURLsFromJSON(JSONObject json) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<String>();
JSONArray datas = json.getJSONArray("data"); JSONArray datas = json.getJSONArray("items");
for (int i = 0; i < datas.length(); i++) { for (int i = 0; i < datas.length(); i++) {
JSONObject data = (JSONObject) datas.get(i); JSONObject data = (JSONObject) datas.get(i);
String imageURL; String imageURL;
@ -166,6 +128,7 @@ public class InstagramRipper extends AbstractJSONRipper {
} }
imageURL = imageURL.replaceAll("scontent.cdninstagram.com/hphotos-", "igcdn-photos-d-a.akamaihd.net/hphotos-ak-"); imageURL = imageURL.replaceAll("scontent.cdninstagram.com/hphotos-", "igcdn-photos-d-a.akamaihd.net/hphotos-ak-");
imageURL = imageURL.replaceAll("s640x640/", ""); imageURL = imageURL.replaceAll("s640x640/", "");
imageURL = imageURL.replaceAll("\\?ig_cache_key.+$", "");
imageURLs.add(imageURL); imageURLs.add(imageURL);
if (isThisATest()) { if (isThisATest()) {
break; break;