Merge pull request #220 from cyian-1756/ig_regex
Improved instagram regex and added sanitizeURL
This commit is contained in:
commit
73b165c645
@ -43,6 +43,13 @@ public class InstagramRipper extends AbstractHTMLRipper {
|
||||
return (url.getHost().endsWith("instagram.com"));
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
URL san_url = new URL(url.toExternalForm().replaceAll("\\?hl=\\S*", ""));
|
||||
logger.info("sanitized URL is " + san_url.toExternalForm());
|
||||
return san_url;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("^https?://instagram.com/([^/]+)/?");
|
||||
@ -51,7 +58,7 @@ public class InstagramRipper extends AbstractHTMLRipper {
|
||||
return m.group(1);
|
||||
}
|
||||
|
||||
p = Pattern.compile("^https?://www.instagram.com/([^/]+)/?");
|
||||
p = Pattern.compile("^https?://www.instagram.com/([^/]+)/?(?:\\?hl=\\S*)?/?");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
|
Loading…
x
Reference in New Issue
Block a user