Improved instagram regex and added sanitizeURL
This commit is contained in:
parent
67db5f3d99
commit
5c185c05df
@ -43,6 +43,13 @@ public class InstagramRipper extends AbstractHTMLRipper {
|
|||||||
return (url.getHost().endsWith("instagram.com"));
|
return (url.getHost().endsWith("instagram.com"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||||
|
URL san_url = new URL(url.toExternalForm().replaceAll("\\?hl=\\S*", ""));
|
||||||
|
logger.info("sanitized URL is " + san_url.toExternalForm());
|
||||||
|
return san_url;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
Pattern p = Pattern.compile("^https?://instagram.com/([^/]+)/?");
|
Pattern p = Pattern.compile("^https?://instagram.com/([^/]+)/?");
|
||||||
@ -51,7 +58,7 @@ public class InstagramRipper extends AbstractHTMLRipper {
|
|||||||
return m.group(1);
|
return m.group(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
p = Pattern.compile("^https?://www.instagram.com/([^/]+)/?");
|
p = Pattern.compile("^https?://www.instagram.com/([^/]+)/?(?:\\?hl=\\S*)?/?");
|
||||||
m = p.matcher(url.toExternalForm());
|
m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return m.group(1);
|
return m.group(1);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user