Merge pull request #693 from cyian-1756/e621UrlRegexFix
E621 ripper now accepts urls with order:Score at the end
This commit is contained in:
commit
fa3d149bd2
@ -96,40 +96,40 @@ public class E621Ripper extends AbstractHTMLRipper{
|
|||||||
|
|
||||||
private String getTerm(URL url) throws MalformedURLException{
|
private String getTerm(URL url) throws MalformedURLException{
|
||||||
if(gidPattern==null)
|
if(gidPattern==null)
|
||||||
gidPattern=Pattern.compile("^https?://(www\\.)?e621\\.net/post/index/[^/]+/([a-zA-Z0-9$_.+!*'(),%-]+)(/.*)?(#.*)?$");
|
gidPattern=Pattern.compile("^https?://(www\\.)?e621\\.net/post/index/[^/]+/([a-zA-Z0-9$_.+!*'():,%\\-]+)(/.*)?(#.*)?$");
|
||||||
if(gidPatternPool==null)
|
if(gidPatternPool==null)
|
||||||
gidPatternPool=Pattern.compile("^https?://(www\\.)?e621\\.net/pool/show/([a-zA-Z0-9$_.+!*'(),%-]+)(\\?.*)?(/.*)?(#.*)?$");
|
gidPatternPool=Pattern.compile("^https?://(www\\.)?e621\\.net/pool/show/([a-zA-Z0-9$_.+!*'(),%:\\-]+)(\\?.*)?(/.*)?(#.*)?$");
|
||||||
|
|
||||||
Matcher m = gidPattern.matcher(url.toExternalForm());
|
Matcher m = gidPattern.matcher(url.toExternalForm());
|
||||||
if(m.matches())
|
if(m.matches()) {
|
||||||
return m.group(2);
|
LOGGER.info(m.group(2));
|
||||||
|
return m.group(2);
|
||||||
|
}
|
||||||
|
|
||||||
m = gidPatternPool.matcher(url.toExternalForm());
|
m = gidPatternPool.matcher(url.toExternalForm());
|
||||||
if(m.matches())
|
if(m.matches()) {
|
||||||
return m.group(2);
|
return m.group(2);
|
||||||
|
}
|
||||||
|
|
||||||
throw new MalformedURLException("Expected e621.net URL format: e621.net/post/index/1/searchterm - got "+url+" instead");
|
throw new MalformedURLException("Expected e621.net URL format: e621.net/post/index/1/searchterm - got "+url+" instead");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
try {
|
|
||||||
String prefix="";
|
String prefix="";
|
||||||
if(url.getPath().startsWith("/pool/show/"))
|
if (url.getPath().startsWith("/pool/show/")) {
|
||||||
prefix="pool_";
|
prefix = "pool_";
|
||||||
|
}
|
||||||
|
|
||||||
return Utils.filesystemSafe(prefix+new URI(getTerm(url)).getPath());
|
return Utils.filesystemSafe(prefix+getTerm(url));
|
||||||
} catch (URISyntaxException ex) {
|
|
||||||
logger.error(ex);
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new MalformedURLException("Expected e621.net URL format: e621.net/post/index/1/searchterm - got "+url+" instead");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||||
if(gidPattern2==null)
|
if(gidPattern2==null)
|
||||||
gidPattern2=Pattern.compile("^https?://(www\\.)?e621\\.net/post/search\\?tags=([a-zA-Z0-9$_.+!*'(),%-]+)(/.*)?(#.*)?$");
|
gidPattern2=Pattern.compile("^https?://(www\\.)?e621\\.net/post/search\\?tags=([a-zA-Z0-9$_.+!*'():,%-]+)(/.*)?(#.*)?$");
|
||||||
|
|
||||||
Matcher m = gidPattern2.matcher(url.toExternalForm());
|
Matcher m = gidPattern2.matcher(url.toExternalForm());
|
||||||
if(m.matches())
|
if(m.matches())
|
||||||
|
Loading…
Reference in New Issue
Block a user