commit
d8170fcf64
@ -11,6 +11,7 @@ import org.jsoup.nodes.Document;
|
|||||||
|
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||||
import com.rarchives.ripme.utils.Utils;
|
import com.rarchives.ripme.utils.Utils;
|
||||||
|
import com.rarchives.ripme.ui.MainWindow;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simplified ripper, designed for ripping from sites by parsing HTML.
|
* Simplified ripper, designed for ripping from sites by parsing HTML.
|
||||||
@ -53,12 +54,29 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
|
|||||||
protected boolean hasDescriptionSupport() {
|
protected boolean hasDescriptionSupport() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected String[] getDescription(String url, Document page) throws IOException {
|
protected String[] getDescription(String url, Document page) throws IOException {
|
||||||
throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function?
|
throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function?
|
||||||
}
|
}
|
||||||
protected int descSleepTime() {
|
protected int descSleepTime() {
|
||||||
return 100;
|
return 100;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected List<String> getAlbumsToQueue(Document doc) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If a page has Queue support then it has no images we want to download, just a list of urls we want to add to
|
||||||
|
// the queue
|
||||||
|
protected boolean hasQueueSupport() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Takes a url and checks if it is for a page of albums
|
||||||
|
protected boolean pageContainsAlbums(URL url) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void rip() throws IOException {
|
public void rip() throws IOException {
|
||||||
int index = 0;
|
int index = 0;
|
||||||
@ -67,6 +85,16 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
|
|||||||
sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm());
|
sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm());
|
||||||
Document doc = getFirstPage();
|
Document doc = getFirstPage();
|
||||||
|
|
||||||
|
if (hasQueueSupport() && pageContainsAlbums(this.url)) {
|
||||||
|
List<String> urls = getAlbumsToQueue(doc);
|
||||||
|
for (String url : urls) {
|
||||||
|
MainWindow.addUrlToQueue(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
// We set doc to null here so the while loop below this doesn't fire
|
||||||
|
doc = null;
|
||||||
|
}
|
||||||
|
|
||||||
while (doc != null) {
|
while (doc != null) {
|
||||||
if (alreadyDownloadedUrls >= Utils.getConfigInteger("history.end_rip_after_already_seen", 1000000000) && !isThisATest()) {
|
if (alreadyDownloadedUrls >= Utils.getConfigInteger("history.end_rip_after_already_seen", 1000000000) && !isThisATest()) {
|
||||||
sendUpdate(STATUS.DOWNLOAD_COMPLETE, "Already seen the last " + alreadyDownloadedUrls + " images ending rip");
|
sendUpdate(STATUS.DOWNLOAD_COMPLETE, "Already seen the last " + alreadyDownloadedUrls + " images ending rip");
|
||||||
|
@ -34,21 +34,18 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
|
|||||||
Pattern p = Pattern.compile("^https?://myhentaicomics.com/index.php/([a-zA-Z0-9-]*)/?$");
|
Pattern p = Pattern.compile("^https?://myhentaicomics.com/index.php/([a-zA-Z0-9-]*)/?$");
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
isTag = false;
|
|
||||||
return m.group(1);
|
return m.group(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
Pattern pa = Pattern.compile("^https?://myhentaicomics.com/index.php/search\\?q=([a-zA-Z0-9-]*)([a-zA-Z0-9=&]*)?$");
|
Pattern pa = Pattern.compile("^https?://myhentaicomics.com/index.php/search\\?q=([a-zA-Z0-9-]*)([a-zA-Z0-9=&]*)?$");
|
||||||
Matcher ma = pa.matcher(url.toExternalForm());
|
Matcher ma = pa.matcher(url.toExternalForm());
|
||||||
if (ma.matches()) {
|
if (ma.matches()) {
|
||||||
isTag = true;
|
|
||||||
return ma.group(1);
|
return ma.group(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
Pattern pat = Pattern.compile("^https?://myhentaicomics.com/index.php/tag/([0-9]*)/?([a-zA-Z%0-9+?=:]*)?$");
|
Pattern pat = Pattern.compile("^https?://myhentaicomics.com/index.php/tag/([0-9]*)/?([a-zA-Z%0-9+?=:]*)?$");
|
||||||
Matcher mat = pat.matcher(url.toExternalForm());
|
Matcher mat = pat.matcher(url.toExternalForm());
|
||||||
if (mat.matches()) {
|
if (mat.matches()) {
|
||||||
isTag = true;
|
|
||||||
return mat.group(1);
|
return mat.group(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -56,6 +53,37 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
|
|||||||
"myhentaicomics.com/index.php/albumName - got " + url + " instead");
|
"myhentaicomics.com/index.php/albumName - got " + url + " instead");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasQueueSupport() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean pageContainsAlbums(URL url) {
|
||||||
|
Pattern pa = Pattern.compile("^https?://myhentaicomics.com/index.php/search\\?q=([a-zA-Z0-9-]*)([a-zA-Z0-9=&]*)?$");
|
||||||
|
Matcher ma = pa.matcher(url.toExternalForm());
|
||||||
|
if (ma.matches()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
Pattern pat = Pattern.compile("^https?://myhentaicomics.com/index.php/tag/([0-9]*)/?([a-zA-Z%0-9+?=:]*)?$");
|
||||||
|
Matcher mat = pat.matcher(url.toExternalForm());
|
||||||
|
if (mat.matches()) {
|
||||||
|
isTag = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getAlbumsToQueue(Document doc) {
|
||||||
|
List<String> urlsToAddToQueue = new ArrayList<>();
|
||||||
|
for (Element elem : doc.select(".g-album > a")) {
|
||||||
|
urlsToAddToQueue.add(getDomain() + elem.attr("href"));
|
||||||
|
}
|
||||||
|
return urlsToAddToQueue;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Document getFirstPage() throws IOException {
|
public Document getFirstPage() throws IOException {
|
||||||
// "url" is an instance field of the superclass
|
// "url" is an instance field of the superclass
|
||||||
@ -81,161 +109,11 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
|
|||||||
return Http.url(nextUrl).get();
|
return Http.url(nextUrl).get();
|
||||||
}
|
}
|
||||||
|
|
||||||
// This replaces getNextPage when downloading from searchs and tags
|
|
||||||
private List<String> getNextAlbumPage(String pageUrl) {
|
|
||||||
List<String> albumPagesList = new ArrayList<>();
|
|
||||||
int pageNumber = 1;
|
|
||||||
albumPagesList.add("http://myhentaicomics.com/index.php/" + pageUrl.split("\\?")[0] + "?page=" + Integer.toString(pageNumber));
|
|
||||||
while (true) {
|
|
||||||
String urlToGet = "http://myhentaicomics.com/index.php/" + pageUrl.split("\\?")[0] + "?page=" + Integer.toString(pageNumber);
|
|
||||||
Document nextAlbumPage;
|
|
||||||
try {
|
|
||||||
logger.info("Grabbing " + urlToGet);
|
|
||||||
nextAlbumPage = Http.url(urlToGet).get();
|
|
||||||
} catch (IOException e) {
|
|
||||||
logger.warn("Failed to log link in Jsoup");
|
|
||||||
nextAlbumPage = null;
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
Element elem = nextAlbumPage.select("a.ui-icon-right").first();
|
|
||||||
String nextPage = elem.attr("href");
|
|
||||||
pageNumber = pageNumber + 1;
|
|
||||||
if (nextPage.equals("")) {
|
|
||||||
logger.info("Got " + pageNumber + " pages");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
logger.info(nextPage);
|
|
||||||
albumPagesList.add(nextPage);
|
|
||||||
logger.info("Adding " + nextPage);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return albumPagesList;
|
|
||||||
}
|
|
||||||
|
|
||||||
private List<String> getAlbumsFromPage(String url) {
|
|
||||||
List<String> pagesToRip;
|
|
||||||
List<String> result = new ArrayList<>();
|
|
||||||
logger.info("Running getAlbumsFromPage");
|
|
||||||
Document doc;
|
|
||||||
try {
|
|
||||||
doc = Http.url("http://myhentaicomics.com" + url).get();
|
|
||||||
} catch (IOException e) {
|
|
||||||
logger.warn("Failed to log link in Jsoup");
|
|
||||||
doc = null;
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
// This for goes over every album on the page
|
|
||||||
for (Element elem : doc.select("li.g-album > a")) {
|
|
||||||
String link = elem.attr("href");
|
|
||||||
logger.info("Grabbing album " + link);
|
|
||||||
pagesToRip = getNextAlbumPage(link);
|
|
||||||
logger.info(pagesToRip);
|
|
||||||
for (String element : pagesToRip) {
|
|
||||||
Document album_doc;
|
|
||||||
try {
|
|
||||||
logger.info("grabbing " + element + " with jsoup");
|
|
||||||
boolean startsWithHttp = element.startsWith("http://");
|
|
||||||
if (!startsWithHttp) {
|
|
||||||
album_doc = Http.url("http://myhentaicomics.com/" + element).get();
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
album_doc = Http.url(element).get();
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
logger.warn("Failed to log link in Jsoup");
|
|
||||||
album_doc = null;
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
for (Element el :album_doc.select("img")) {
|
|
||||||
String imageSource = el.attr("src");
|
|
||||||
// This bool is here so we don't try and download the site logo
|
|
||||||
if (!imageSource.startsWith("http://")) {
|
|
||||||
// We replace thumbs with resizes so we can the full sized images
|
|
||||||
imageSource = imageSource.replace("thumbs", "resizes");
|
|
||||||
String url_string = "http://myhentaicomics.com/" + imageSource;
|
|
||||||
url_string = url_string.replace("%20", "_");
|
|
||||||
url_string = url_string.replace("%27", "");
|
|
||||||
url_string = url_string.replace("%28", "_");
|
|
||||||
url_string = url_string.replace("%29", "_");
|
|
||||||
url_string = url_string.replace("%2C", "_");
|
|
||||||
if (isTag) {
|
|
||||||
logger.info("Downloading from a tag or search");
|
|
||||||
try {
|
|
||||||
sleep(500);
|
|
||||||
result.add("http://myhentaicomics.com/" + imageSource);
|
|
||||||
addURLToDownload(new URL("http://myhentaicomics.com/" + imageSource), "", url_string.split("/")[6]);
|
|
||||||
}
|
|
||||||
catch (MalformedURLException e) {
|
|
||||||
logger.warn("Malformed URL");
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
private List<String> getListOfPages(Document doc) {
|
|
||||||
List<String> pages = new ArrayList<>();
|
|
||||||
// Get the link from the last button
|
|
||||||
String nextPageUrl = doc.select("a.ui-icon-right").last().attr("href");
|
|
||||||
Pattern pat = Pattern.compile("/index\\.php/tag/[0-9]*/[a-zA-Z0-9_\\-:+]*\\?page=(\\d+)");
|
|
||||||
Matcher mat = pat.matcher(nextPageUrl);
|
|
||||||
if (mat.matches()) {
|
|
||||||
logger.debug("Getting pages from a tag");
|
|
||||||
String base_link = mat.group(0).replaceAll("\\?page=\\d+", "");
|
|
||||||
logger.debug("base_link is " + base_link);
|
|
||||||
int numOfPages = Integer.parseInt(mat.group(1));
|
|
||||||
for (int x = 1; x != numOfPages +1; x++) {
|
|
||||||
logger.debug("running loop");
|
|
||||||
String link = base_link + "?page=" + Integer.toString(x);
|
|
||||||
pages.add(link);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
Pattern pa = Pattern.compile("/index\\.php/search\\?q=[a-zA-Z0-9_\\-:]*&page=(\\d+)");
|
|
||||||
Matcher ma = pa.matcher(nextPageUrl);
|
|
||||||
if (ma.matches()) {
|
|
||||||
logger.debug("Getting pages from a search");
|
|
||||||
String base_link = ma.group(0).replaceAll("page=\\d+", "");
|
|
||||||
logger.debug("base_link is " + base_link);
|
|
||||||
int numOfPages = Integer.parseInt(ma.group(1));
|
|
||||||
for (int x = 1; x != numOfPages +1; x++) {
|
|
||||||
logger.debug("running loop");
|
|
||||||
String link = base_link + "page=" + Integer.toString(x);
|
|
||||||
logger.debug(link);
|
|
||||||
pages.add(link);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return pages;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
List<String> result = new ArrayList<>();
|
List<String> result = new ArrayList<>();
|
||||||
// Checks if this is a comic page or a page of albums
|
|
||||||
// If true the page is a page of albums
|
|
||||||
if (doc.toString().contains("class=\"g-item g-album\"")) {
|
|
||||||
// This if checks that there is more than 1 page
|
|
||||||
if (!doc.select("a.ui-icon-right").last().attr("href").equals("")) {
|
|
||||||
// There is more than one page so we call getListOfPages
|
|
||||||
List<String> pagesToRip = getListOfPages(doc);
|
|
||||||
logger.debug("Pages to rip = " + pagesToRip);
|
|
||||||
for (String url : pagesToRip) {
|
|
||||||
logger.debug("Getting albums from " + url);
|
|
||||||
result = getAlbumsFromPage(url);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
logger.debug("There is only one page on this page of albums");
|
|
||||||
// There is only 1 page so we call getAlbumsFromPage and pass it the page url
|
|
||||||
result = getAlbumsFromPage(doc.select("div.g-description > a").attr("href"));
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
for (Element el : doc.select("img")) {
|
for (Element el : doc.select("img")) {
|
||||||
String imageSource = el.attr("src");
|
String imageSource = el.attr("src");
|
||||||
// This bool is here so we don't try and download the site logo
|
// This bool is here so we don't try and download the site logo
|
||||||
@ -245,7 +123,6 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
|
|||||||
result.add("http://myhentaicomics.com/" + imageSource);
|
result.add("http://myhentaicomics.com/" + imageSource);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -162,6 +162,11 @@ public final class MainWindow implements Runnable, RipStatusHandler {
|
|||||||
return checkbox;
|
return checkbox;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static void addUrlToQueue(String url) {
|
||||||
|
queueListModel.addElement(url);
|
||||||
|
}
|
||||||
|
|
||||||
public MainWindow() {
|
public MainWindow() {
|
||||||
mainFrame = new JFrame("RipMe v" + UpdateUtils.getThisJarVersion());
|
mainFrame = new JFrame("RipMe v" + UpdateUtils.getThisJarVersion());
|
||||||
mainFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
|
mainFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
|
||||||
|
Loading…
Reference in New Issue
Block a user