Merge pull request #385 from cyian-1756/ig-image-fixes

Instagram ripper no longer 403s on certain images
This commit is contained in:
cyian-1756 2018-01-11 16:29:20 -05:00 committed by GitHub
commit b5e9e13adf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -161,6 +161,8 @@ public class InstagramRipper extends AbstractHTMLRipper {
} }
private String getOriginalUrl(String imageURL) { private String getOriginalUrl(String imageURL) {
// Without this regex most images will return a 403 error
imageURL = imageURL.replaceAll("vp/[a-zA-Z0-9]*/", "");
imageURL = imageURL.replaceAll("scontent.cdninstagram.com/hphotos-", "igcdn-photos-d-a.akamaihd.net/hphotos-ak-"); imageURL = imageURL.replaceAll("scontent.cdninstagram.com/hphotos-", "igcdn-photos-d-a.akamaihd.net/hphotos-ak-");
// TODO replace this with a single regex // TODO replace this with a single regex
imageURL = imageURL.replaceAll("p150x150/", ""); imageURL = imageURL.replaceAll("p150x150/", "");
@ -178,6 +180,7 @@ public class InstagramRipper extends AbstractHTMLRipper {
imageURL = imageURL.replaceAll("s1080x1080/", ""); imageURL = imageURL.replaceAll("s1080x1080/", "");
imageURL = imageURL.replaceAll("s2048x2048/", ""); imageURL = imageURL.replaceAll("s2048x2048/", "");
// Instagram returns cropped images to unauthenticated applications to maintain legacy support. // Instagram returns cropped images to unauthenticated applications to maintain legacy support.
// To retrieve the uncropped image, remove this segment from the URL. // To retrieve the uncropped image, remove this segment from the URL.
// Segment format: cX.Y.W.H - eg: c0.134.1080.1080 // Segment format: cX.Y.W.H - eg: c0.134.1080.1080
@ -232,7 +235,7 @@ public class InstagramRipper extends AbstractHTMLRipper {
if (imageURLs.size() == 0) { if (imageURLs.size() == 0) {
// We add this one item to the array because either wise // We add this one item to the array because either wise
// the ripper will error out because we returned an empty array // the ripper will error out because we returned an empty array
imageURLs.add(data.getString("thumbnail_src")); imageURLs.add(getOriginalUrl(data.getString("thumbnail_src")));
} }
addURLToDownload(new URL(getOriginalUrl(data.getString("thumbnail_src"))), image_date); addURLToDownload(new URL(getOriginalUrl(data.getString("thumbnail_src"))), image_date);
} else { } else {