attempt at fixing issue #330 instagram api changes
This commit is contained in:
parent
6321aba898
commit
08ce833863
@ -37,14 +37,12 @@ public class InstagramRipper extends AbstractJSONRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean canRip(URL url) {
|
public boolean canRip(URL url) {
|
||||||
return (url.getHost().endsWith("instagram.com")
|
return (url.getHost().endsWith("instagram.com"));
|
||||||
|| url.getHost().endsWith("statigr.am")
|
|
||||||
|| url.getHost().endsWith("iconosquare.com/"));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
Pattern p = Pattern.compile("^https?://iconosquare.com/([a-zA-Z0-9\\-_.]{3,}).*$");
|
Pattern p = Pattern.compile("^https?://instagram.com/([^/]+)");
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return m.group(1);
|
return m.group(1);
|
||||||
@ -54,106 +52,70 @@ public class InstagramRipper extends AbstractJSONRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||||
Pattern p = Pattern.compile("^https?://instagram\\.com/p/([a-zA-Z0-9\\-_.]{1,}).*$");
|
Pattern p = Pattern.compile("^.*instagram\\.com/([a-zA-Z0-9\\-_.]{3,}).*$");
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
// Link to photo, not the user account
|
return new URL("http://instagram.com/" + m.group(1));
|
||||||
try {
|
|
||||||
url = getUserPageFromImage(url);
|
|
||||||
} catch (Exception e) {
|
|
||||||
logger.error("[!] Failed to get user page from " + url, e);
|
|
||||||
throw new MalformedURLException("Failed to retrieve user page from " + url);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
p = Pattern.compile("^.*instagram\\.com/([a-zA-Z0-9\\-_.]{3,}).*$");
|
|
||||||
m = p.matcher(url.toExternalForm());
|
|
||||||
if (m.matches()) {
|
|
||||||
return new URL("http://iconosquare.com/" + m.group(1));
|
|
||||||
}
|
|
||||||
p = Pattern.compile("^.*iconosquare\\.com/([a-zA-Z0-9\\-_.]{3,}).*$");
|
|
||||||
m = p.matcher(url.toExternalForm());
|
|
||||||
if (m.matches()) {
|
|
||||||
return new URL("http://iconosquare.com/" + m.group(1));
|
|
||||||
}
|
|
||||||
p = Pattern.compile("^.*statigr\\.am/([a-zA-Z0-9\\-_.]{3,}).*$");
|
|
||||||
m = p.matcher(url.toExternalForm());
|
|
||||||
if (m.matches()) {
|
|
||||||
return new URL("http://iconosquare.com/" + m.group(1));
|
|
||||||
}
|
|
||||||
throw new MalformedURLException("Expected username in URL (instagram.com/username and not " + url);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private URL getUserPageFromImage(URL url) throws IOException {
|
|
||||||
Document doc = Http.url(url).get();
|
|
||||||
for (Element element : doc.select("meta[property='og:description']")) {
|
|
||||||
String content = element.attr("content");
|
|
||||||
if (content.endsWith("'s photo on Instagram")) {
|
|
||||||
return new URL("http://iconosquare/" + content.substring(0, content.indexOf("'")));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
throw new MalformedURLException("Expected username in URL (instagram.com/username and not " + url);
|
throw new MalformedURLException("Expected username in URL (instagram.com/username and not " + url);
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getUserID(URL url) throws IOException {
|
private String getUserID(URL url) throws IOException {
|
||||||
this.sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm());
|
|
||||||
Document doc = Http.url(url).get();
|
Pattern p = Pattern.compile("^https?://instagram\\.com/([^/]+)");
|
||||||
for (Element element : doc.select("input[id=user_public]")) {
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
return element.attr("value");
|
if(m.matches()) {
|
||||||
|
return m.group(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new IOException("Unable to find userID at " + this.url);
|
throw new IOException("Unable to find userID at " + this.url);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public JSONObject getFirstPage() throws IOException {
|
public JSONObject getFirstPage() throws IOException {
|
||||||
userID = getUserID(url);
|
userID = getUserID(url);
|
||||||
String baseURL = "http://iconosquare.com/controller_nl.php?action=getPhotoUserPublic&user_id="
|
|
||||||
+ userID;
|
String baseURL = "http://instagram.com/" + userID + "/media";
|
||||||
logger.info("Loading " + baseURL);
|
|
||||||
try {
|
try {
|
||||||
JSONObject result = Http.url(baseURL).getJSON();
|
JSONObject result = Http.url(baseURL).getJSON();
|
||||||
return result;
|
return result;
|
||||||
} catch (JSONException e) {
|
} catch (JSONException e) {
|
||||||
throw new IOException("Could not get instagram user via iconosquare", e);
|
throw new IOException("Could not get instagram user via: " + baseURL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public JSONObject getNextPage(JSONObject json) throws IOException {
|
public JSONObject getNextPage(JSONObject json) throws IOException {
|
||||||
if (isThisATest()) {
|
|
||||||
return null;
|
boolean nextPageAvailable;
|
||||||
|
try {
|
||||||
|
nextPageAvailable = json.getBoolean("more_available");
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new IOException("No additional pages found");
|
||||||
}
|
}
|
||||||
JSONObject pagination = json.getJSONObject("pagination");
|
|
||||||
String nextMaxID = "";
|
if(nextPageAvailable) {
|
||||||
JSONArray datas = json.getJSONArray("data");
|
JSONArray items = json.getJSONArray("items");
|
||||||
for (int i = 0; i < datas.length(); i++) {
|
JSONObject last_item = items.getJSONObject(items.length() - 1);
|
||||||
JSONObject data = datas.getJSONObject(i);
|
String nextMaxID = last_item.getString("id");
|
||||||
if (data.has("id")) {
|
|
||||||
nextMaxID = data.getString("id");
|
String baseURL = "http://instagram.com/" + userID + "/media/?max_id=" + nextMaxID;
|
||||||
}
|
|
||||||
}
|
|
||||||
if (nextMaxID.equals("")) {
|
|
||||||
if (!pagination.has("next_max_id")) {
|
|
||||||
throw new IOException("No next_max_id found, stopping");
|
|
||||||
}
|
|
||||||
nextMaxID = pagination.getString("next_max_id");
|
|
||||||
}
|
|
||||||
String baseURL = "http://iconosquare.com/controller_nl.php?action=getPhotoUserPublic&user_id="
|
|
||||||
+ userID
|
|
||||||
+ "&max_id=" + nextMaxID;
|
|
||||||
logger.info("Loading " + baseURL);
|
logger.info("Loading " + baseURL);
|
||||||
sleep(1000);
|
sleep(1000);
|
||||||
|
|
||||||
JSONObject nextJSON = Http.url(baseURL).getJSON();
|
JSONObject nextJSON = Http.url(baseURL).getJSON();
|
||||||
datas = nextJSON.getJSONArray("data");
|
|
||||||
if (datas.length() == 0) {
|
return nextJSON;
|
||||||
|
} else {
|
||||||
throw new IOException("No more images found");
|
throw new IOException("No more images found");
|
||||||
}
|
}
|
||||||
return nextJSON;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromJSON(JSONObject json) {
|
public List<String> getURLsFromJSON(JSONObject json) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<String>();
|
||||||
JSONArray datas = json.getJSONArray("data");
|
JSONArray datas = json.getJSONArray("items");
|
||||||
for (int i = 0; i < datas.length(); i++) {
|
for (int i = 0; i < datas.length(); i++) {
|
||||||
JSONObject data = (JSONObject) datas.get(i);
|
JSONObject data = (JSONObject) datas.get(i);
|
||||||
String imageURL;
|
String imageURL;
|
||||||
@ -166,6 +128,7 @@ public class InstagramRipper extends AbstractJSONRipper {
|
|||||||
}
|
}
|
||||||
imageURL = imageURL.replaceAll("scontent.cdninstagram.com/hphotos-", "igcdn-photos-d-a.akamaihd.net/hphotos-ak-");
|
imageURL = imageURL.replaceAll("scontent.cdninstagram.com/hphotos-", "igcdn-photos-d-a.akamaihd.net/hphotos-ak-");
|
||||||
imageURL = imageURL.replaceAll("s640x640/", "");
|
imageURL = imageURL.replaceAll("s640x640/", "");
|
||||||
|
imageURL = imageURL.replaceAll("\\?ig_cache_key.+$", "");
|
||||||
imageURLs.add(imageURL);
|
imageURLs.add(imageURL);
|
||||||
if (isThisATest()) {
|
if (isThisATest()) {
|
||||||
break;
|
break;
|
||||||
|
Loading…
Reference in New Issue
Block a user