From e49cab1254a6f387b65d52d9497b6e48bbcf98bc Mon Sep 17 00:00:00 2001 From: Mads Date: Thu, 13 Mar 2014 20:13:01 +0100 Subject: [PATCH 1/5] Fixed RedditRipper (Removed GoneWildRipper). RedditRipper retries download on timeout (should be a global setting) --- .../ripme/ripper/rippers/GonewildRipper.java | 112 ------------------ .../ripme/ripper/rippers/RedditRipper.java | 26 +++- .../java/com/rarchives/ripme/utils/Utils.java | 22 ++++ .../ripper/rippers/GonewildRipperTest.java | 31 ----- 4 files changed, 42 insertions(+), 149 deletions(-) delete mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/GonewildRipper.java delete mode 100644 src/test/java/com/rarchives/ripme/tst/ripper/rippers/GonewildRipperTest.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GonewildRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GonewildRipper.java deleted file mode 100644 index 402db0f2..00000000 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/GonewildRipper.java +++ /dev/null @@ -1,112 +0,0 @@ -package com.rarchives.ripme.ripper.rippers; - -import java.io.IOException; -import java.net.MalformedURLException; -import java.net.URL; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.apache.log4j.Logger; -import org.json.JSONArray; -import org.json.JSONObject; -import org.jsoup.Jsoup; - -import com.rarchives.ripme.ripper.AbstractRipper; -import com.rarchives.ripme.utils.Utils; - -public class GonewildRipper extends AbstractRipper { - - private static final String HOST = "gonewild"; - private static final Logger logger = Logger.getLogger(GonewildRipper.class); - private static final int SLEEP_TIME = 1000; - - private static String API_DOMAIN; - private String username; - - public GonewildRipper(URL url) throws IOException { - super(url); - API_DOMAIN = Utils.getConfigString("gw.api", "gonewild"); - } - - @Override - public boolean canRip(URL url) { - return getUsernameMatcher(url).matches(); - } - - private Matcher getUsernameMatcher(URL url) { - Pattern p = Pattern.compile("^https?://[a-z]{0,3}\\.?reddit\\.com/(u|user)/([a-zA-Z0-9\\-]{3,})/?.*$"); - return p.matcher(url.toExternalForm()); - } - - @Override - public URL sanitizeURL(URL url) throws MalformedURLException { - return url; - } - - @Override - public void rip() throws IOException { - int start = 0, - count = 50; - String baseGwURL = "http://" + API_DOMAIN + ".rarchives.com/api.cgi" - + "?method=get_user" - + "&user=" + username - + "&count=" + count; - String gwURL, jsonString, imagePath; - JSONArray posts, images; - JSONObject json, post, image; - while (true) { - logger.info(" Retrieving posts by " + username); - gwURL = baseGwURL - + "&start=" + start; - start += count; - jsonString = Jsoup.connect(gwURL) - .ignoreContentType(true) - .execute() - .body(); - json = new JSONObject(jsonString); - if (json.has("error")) { - logger.error("Error while retrieving user posts:" + json.getString("error")); - break; - } - posts = json.getJSONArray("posts"); - if (posts.length() == 0) { - break; // No more posts to get - } - for (int i = 0; i < posts.length(); i++) { - post = (JSONObject) posts.get(i); - images = post.getJSONArray("images"); - for (int j = 0; j < images.length(); j++) { - image = (JSONObject) images.get(j); - imagePath = image.getString("path"); - if (imagePath.startsWith("..")) { - imagePath = imagePath.substring(2); - } - imagePath = "http://" + API_DOMAIN + ".rarchives.com" + imagePath; - logger.info(" Found file: " + imagePath); - addURLToDownload(new URL(imagePath)); - } - } - try { - Thread.sleep(SLEEP_TIME); - } catch (InterruptedException e) { - logger.error("[!] Interrupted while waiting to load more posts", e); - break; - } - } - waitForThreads(); - } - - @Override - public String getHost() { - return HOST; - } - - @Override - public String getGID(URL url) throws MalformedURLException { - Matcher m = getUsernameMatcher(url); - if (m.matches()) { - this.username = m.group(m.groupCount()); - } - return username; - } -} diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java index f8011ad2..a6c03458 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java @@ -16,6 +16,8 @@ import org.jsoup.nodes.Document; import com.rarchives.ripme.ripper.AbstractRipper; import com.rarchives.ripme.utils.RipUtils; +import com.rarchives.ripme.utils.Utils; +import java.net.SocketTimeoutException; public class RedditRipper extends AbstractRipper { @@ -26,7 +28,7 @@ public class RedditRipper extends AbstractRipper { private static final String HOST = "reddit"; private static final String DOMAIN = "reddit.com"; - private static final Logger logger = Logger.getLogger(GonewildRipper.class); + private static final Logger logger = Logger.getLogger(RedditRipper.class); private static final int SLEEP_TIME = 2000; //private static final String USER_AGENT = "ripme by /u/4_pr0n github.com/4pr0n/ripme"; @@ -67,6 +69,8 @@ public class RedditRipper extends AbstractRipper { waitForThreads(); } + + private URL getAndParseAndReturnNext(URL url) throws IOException { JSONArray jsonArray = getJsonArrayFromURL(url), children; JSONObject json, data; @@ -85,7 +89,7 @@ public class RedditRipper extends AbstractRipper { parseJsonChild(children.getJSONObject(j)); } if (data.has("after") && !data.isNull("after")) { - String nextURLString = url.toExternalForm(); + String nextURLString = Utils.stripURLParameter(url.toExternalForm(), "after"); if (nextURLString.contains("?")) { nextURLString = nextURLString.concat("&after=" + data.getString("after")); } @@ -111,11 +115,21 @@ public class RedditRipper extends AbstractRipper { } lastRequestTime = System.currentTimeMillis(); + int attempts = 0; + Document doc = null; logger.info(" Retrieving " + url); - Document doc= Jsoup.connect(url.toExternalForm()) - .ignoreContentType(true) - .userAgent(USER_AGENT) - .get(); + while(doc == null && attempts++ < 3) { + try { + doc= Jsoup.connect(url.toExternalForm()) + .ignoreContentType(true) + .userAgent(USER_AGENT) + .get(); + } catch(SocketTimeoutException ex) { + if(attempts >= 3) throw ex; + logger.warn(String.format("[!] Connection timed out (attempt %d)", attempts)); + } + } + String jsonString = doc.body().html().replaceAll(""", "\""); Object jsonObj = new JSONTokener(jsonString).nextValue(); diff --git a/src/main/java/com/rarchives/ripme/utils/Utils.java b/src/main/java/com/rarchives/ripme/utils/Utils.java index 8124c981..98039c26 100644 --- a/src/main/java/com/rarchives/ripme/utils/Utils.java +++ b/src/main/java/com/rarchives/ripme/utils/Utils.java @@ -83,6 +83,28 @@ public class Utils { } return prettySaveAs; } + + public static String stripURLParameter(String url, String parameter) { + int paramIndex = url.indexOf("?" + parameter); + boolean wasFirstParam = true; + if(paramIndex < 0) { + wasFirstParam = false; + paramIndex = url.indexOf("&" + parameter); + } + + if(paramIndex > 0) { + int nextParam = url.indexOf("&", paramIndex+1); + if(nextParam != -1) { + String c = "&"; + if(wasFirstParam) c = "?"; + url = url.substring(0, paramIndex) + c + url.substring(nextParam+1, url.length()); + } else { + url = url.substring(0, paramIndex); + } + } + + return url; + } /** * Removes the current working directory from a given filename diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/GonewildRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/GonewildRipperTest.java deleted file mode 100644 index f9f748ae..00000000 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/GonewildRipperTest.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.rarchives.ripme.tst.ripper.rippers; - -import java.io.IOException; -import java.net.URL; -import java.util.ArrayList; -import java.util.List; - -import com.rarchives.ripme.ripper.rippers.GonewildRipper; - -public class GonewildRipperTest extends RippersTest { - - public void testInstagramAlbums() throws IOException { - if (!DOWNLOAD_CONTENT) { - return; - } - List contentURLs = new ArrayList(); - contentURLs.add(new URL("http://reddit.com/u/amle69")); - for (URL url : contentURLs) { - try { - GonewildRipper ripper = new GonewildRipper(url); - ripper.rip(); - assert(ripper.getWorkingDir().listFiles().length > 1); - deleteDir(ripper.getWorkingDir()); - } catch (Exception e) { - e.printStackTrace(); - fail("Error while ripping URL " + url + ": " + e.getMessage()); - } - } - } - -} From acde4ed63f33323c179a04c5c48f927bbbcd72fa Mon Sep 17 00:00:00 2001 From: Mads Date: Thu, 13 Mar 2014 20:14:51 +0100 Subject: [PATCH 2/5] Fixed Imgur URLs not being recognized in some cases --- .../java/com/rarchives/ripme/utils/RipUtils.java | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/utils/RipUtils.java b/src/main/java/com/rarchives/ripme/utils/RipUtils.java index 9a055e0e..1bb8a64d 100644 --- a/src/main/java/com/rarchives/ripme/utils/RipUtils.java +++ b/src/main/java/com/rarchives/ripme/utils/RipUtils.java @@ -19,12 +19,14 @@ public class RipUtils { List result = new ArrayList(); // Imgur album - if (url.getHost().equals("imgur.com") && url.toExternalForm().contains("imgur.com/a/")) { + if ((url.getHost().equals("m.imgur.com") || url.getHost().equals("imgur.com")) + && url.toExternalForm().contains("imgur.com/a/")) { try { return ImgurRipper.getURLsFromAlbum(url); } catch (IOException e) { logger.error("[!] Exception while loading album " + url, e); } + } // Direct link to image @@ -40,6 +42,17 @@ public class RipUtils { } } + if(url.getHost().equals("imgur.com") || + url.getHost().equals("m.imgur.com")){ + try { + result.add(new URL(url.toExternalForm() + ".png")); + return result; + } catch (MalformedURLException ex) { + logger.error("[!] Exception while loading album " + url, ex); + } + + } + logger.error("[!] Unable to rip URL: " + url); return result; } From 812bf26b3c5b2182947f4dd059914281321e47c9 Mon Sep 17 00:00:00 2001 From: Mads Date: Thu, 13 Mar 2014 20:18:35 +0100 Subject: [PATCH 3/5] Fixed UI exceptions caused by modifications to the UI outside the event thread. --- .../ripme/ripper/AbstractRipper.java | 72 ++++----- .../com/rarchives/ripme/ui/MainWindow.java | 153 +++++++++--------- .../rarchives/ripme/ui/RipStatusHandler.java | 14 ++ 3 files changed, 127 insertions(+), 112 deletions(-) create mode 100644 src/main/java/com/rarchives/ripme/ui/RipStatusHandler.java diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java index 952f8420..cb737410 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java @@ -1,5 +1,7 @@ package com.rarchives.ripme.ripper; +import com.rarchives.ripme.ui.MainWindow; +import com.rarchives.ripme.ui.RipStatusHandler; import java.io.File; import java.io.IOException; import java.lang.reflect.Constructor; @@ -17,6 +19,7 @@ import org.apache.log4j.Logger; import com.rarchives.ripme.ui.RipStatusMessage; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Utils; +import java.util.Collections; public abstract class AbstractRipper extends Observable @@ -30,11 +33,11 @@ public abstract class AbstractRipper protected URL url; protected File workingDir; protected DownloadThreadPool threadPool; - protected Observer observer = null; + protected RipStatusHandler observer = null; - protected Map itemsPending = new HashMap(); - protected Map itemsCompleted = new HashMap(); - protected Map itemsErrored = new HashMap(); + protected Map itemsPending = Collections.synchronizedMap(new HashMap()); + protected Map itemsCompleted = Collections.synchronizedMap(new HashMap()); + protected Map itemsErrored = Collections.synchronizedMap(new HashMap()); protected boolean completed = true; public abstract void rip() throws IOException; @@ -59,7 +62,7 @@ public abstract class AbstractRipper this.threadPool = new DownloadThreadPool(); } - public void setObserver(Observer obs) { + public void setObserver(RipStatusHandler obs) { this.observer = obs; } @@ -162,7 +165,6 @@ public abstract class AbstractRipper public void retrievingSource(URL url) { RipStatusMessage msg = new RipStatusMessage(STATUS.LOADING_RESOURCE, url); observer.update(this, msg); - observer.notifyAll(); } /** @@ -179,13 +181,11 @@ public abstract class AbstractRipper try { String path = Utils.removeCWD(saveAs); RipStatusMessage msg = new RipStatusMessage(STATUS.DOWNLOAD_COMPLETE, path); - synchronized(observer) { - itemsPending.remove(url); - itemsCompleted.put(url, saveAs); - observer.update(this, msg); - observer.notifyAll(); - checkIfComplete(); - } + itemsPending.remove(url); + itemsCompleted.put(url, saveAs); + observer.update(this, msg); + + checkIfComplete(); } catch (Exception e) { logger.error("Exception while updating observer: ", e); } @@ -200,13 +200,11 @@ public abstract class AbstractRipper if (observer == null) { return; } - synchronized(observer) { - itemsPending.remove(url); - itemsErrored.put(url, reason); - observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_ERRORED, url + " : " + reason)); - observer.notifyAll(); - checkIfComplete(); - } + itemsPending.remove(url); + itemsErrored.put(url, reason); + observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_ERRORED, url + " : " + reason)); + + checkIfComplete(); } /** @@ -219,12 +217,12 @@ public abstract class AbstractRipper if (observer == null) { return; } - synchronized(observer) { - itemsPending.remove(url); - itemsErrored.put(url, message); - observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_WARN, url + " : " + message)); - observer.notifyAll(); - } + + itemsPending.remove(url); + itemsErrored.put(url, message); + observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_WARN, url + " : " + message)); + + checkIfComplete(); } @@ -235,16 +233,13 @@ public abstract class AbstractRipper if (observer == null) { return; } - synchronized (observer) { - if (!completed && itemsPending.size() == 0) { - completed = true; - logger.info(" Rip completed!"); - observer.update(this, - new RipStatusMessage( - STATUS.RIP_COMPLETE, - workingDir)); - observer.notifyAll(); - } + + if (!completed && itemsPending.isEmpty()) { + completed = true; + logger.info(" Rip completed!"); + + RipStatusMessage msg = new RipStatusMessage(STATUS.RIP_COMPLETE, workingDir); + observer.update(this, msg); } } @@ -325,10 +320,7 @@ public abstract class AbstractRipper if (observer == null) { return; } - synchronized (observer) { - observer.update(this, new RipStatusMessage(status, message)); - observer.notifyAll(); - } + observer.update(this, new RipStatusMessage(status, message)); } /** diff --git a/src/main/java/com/rarchives/ripme/ui/MainWindow.java b/src/main/java/com/rarchives/ripme/ui/MainWindow.java index 756d86f5..43edc92e 100644 --- a/src/main/java/com/rarchives/ripme/ui/MainWindow.java +++ b/src/main/java/com/rarchives/ripme/ui/MainWindow.java @@ -46,7 +46,7 @@ import com.rarchives.ripme.utils.Utils; /** * Everything UI-related starts and ends here. */ -public class MainWindow implements Runnable { +public class MainWindow implements Runnable, RipStatusHandler { private static final Logger logger = Logger.getLogger(MainWindow.class); @@ -101,7 +101,7 @@ public class MainWindow implements Runnable { mainFrame.setVisible(true); } - public static void status(String text) { + public synchronized static void status(String text) { statusLabel.setText(text); mainFrame.pack(); } @@ -276,24 +276,14 @@ public class MainWindow implements Runnable { } private void appendLog(final String text, final Color color) { - try { - SwingUtilities.invokeAndWait(new Runnable() { - @Override - public void run() { - SimpleAttributeSet sas = new SimpleAttributeSet(); - StyleConstants.setForeground(sas, color); - StyledDocument sd = logText.getStyledDocument(); - try { - sd.insertString(sd.getLength(), text + "\n", sas); - } catch (BadLocationException e) { } - logText.setCaretPosition(logText.getText().length()); - } - }); - } catch (InterruptedException e) { - e.printStackTrace(); - } catch (InvocationTargetException e) { - e.printStackTrace(); - } + SimpleAttributeSet sas = new SimpleAttributeSet(); + StyleConstants.setForeground(sas, color); + StyledDocument sd = logText.getStyledDocument(); + try { + sd.insertString(sd.getLength(), text + "\n", sas); + } catch (BadLocationException e) { } + + logText.setCaretPosition(sd.getLength()); } private void loadHistory() { @@ -359,7 +349,7 @@ public class MainWindow implements Runnable { try { AbstractRipper ripper = AbstractRipper.getRipper(url); ripTextfield.setText(ripper.getURL().toExternalForm()); - ripper.setObserver(new RipStatusHandler()); + ripper.setObserver((RipStatusHandler) this); Thread t = new Thread(ripper); t.start(); return t; @@ -375,64 +365,83 @@ public class MainWindow implements Runnable { ripAlbum(ripTextfield.getText()); } } + + private class StatusEvent implements Runnable { + private final AbstractRipper ripper; + private final RipStatusMessage msg; - class RipStatusHandler implements Observer { - public void update(Observable observable, Object object) { - RipStatusMessage msg = (RipStatusMessage) object; - - int completedPercent = ((AbstractRipper) observable).getCompletionPercentage(); - statusProgress.setValue(completedPercent); - status( ((AbstractRipper)observable).getStatusText() ); - - switch(msg.getStatus()) { - case LOADING_RESOURCE: - case DOWNLOAD_STARTED: - appendLog( "Downloading: " + (String) msg.getObject(), Color.BLACK); - break; - case DOWNLOAD_COMPLETE: - appendLog( "Completed: " + (String) msg.getObject(), Color.GREEN); - break; - case DOWNLOAD_ERRORED: - appendLog( "Error: " + (String) msg.getObject(), Color.RED); - break; - - case DOWNLOAD_WARN: - appendLog( "Warn: " + (String) msg.getObject(), Color.ORANGE); - break; - - case RIP_COMPLETE: - if (!historyListModel.contains(ripTextfield.getText())) { - historyListModel.addElement(ripTextfield.getText()); - } - saveHistory(); - ripButton.setEnabled(true); - ripTextfield.setEnabled(true); - statusProgress.setValue(100); - statusLabel.setVisible(false); - openButton.setVisible(true); - File f = (File) msg.getObject(); - String prettyFile = Utils.removeCWD(f); - openButton.setText("Open " + prettyFile); - appendLog( "Rip complete, saved to " + prettyFile, Color.GREEN); - openButton.setActionCommand(f.toString()); - openButton.addActionListener(new ActionListener() { - @Override - public void actionPerformed(ActionEvent event) { - try { - Desktop.getDesktop().open(new File(event.getActionCommand())); - } catch (Exception e) { - logger.error(e); - } - } - }); - mainFrame.pack(); - } + public StatusEvent(AbstractRipper ripper, RipStatusMessage msg) { + this.ripper = ripper; + this.msg = msg; } + + public void run() { + handleEvent(this); + } + } + + private void handleEvent(StatusEvent evt) { + RipStatusMessage msg = evt.msg; + + int completedPercent = evt.ripper.getCompletionPercentage(); + statusProgress.setValue(completedPercent); + status( evt.ripper.getStatusText() ); + + switch(msg.getStatus()) { + case LOADING_RESOURCE: + case DOWNLOAD_STARTED: + appendLog( "Downloading: " + (String) msg.getObject(), Color.BLACK); + break; + case DOWNLOAD_COMPLETE: + appendLog( "Completed: " + (String) msg.getObject(), Color.GREEN); + break; + case DOWNLOAD_ERRORED: + appendLog( "Error: " + (String) msg.getObject(), Color.RED); + break; + + case DOWNLOAD_WARN: + appendLog( "Warn: " + (String) msg.getObject(), Color.ORANGE); + break; + + case RIP_COMPLETE: + if (!historyListModel.contains(ripTextfield.getText())) { + historyListModel.addElement(ripTextfield.getText()); + } + saveHistory(); + ripButton.setEnabled(true); + ripTextfield.setEnabled(true); + statusProgress.setValue(100); + statusLabel.setVisible(false); + openButton.setVisible(true); + File f = (File) msg.getObject(); + String prettyFile = Utils.removeCWD(f); + openButton.setText("Open " + prettyFile); + appendLog( "Rip complete, saved to " + prettyFile, Color.GREEN); + openButton.setActionCommand(f.toString()); + openButton.addActionListener(new ActionListener() { + @Override + public void actionPerformed(ActionEvent event) { + try { + Desktop.getDesktop().open(new File(event.getActionCommand())); + } catch (Exception e) { + logger.error(e); + } + } + }); + mainFrame.pack(); + } + } + + public void update(AbstractRipper ripper, RipStatusMessage message) { + StatusEvent event = new StatusEvent(ripper, message); + SwingUtilities.invokeLater(event); } /** Simple TextPane that allows horizontal scrolling. */ class JTextPaneNoWrap extends JTextPane { private static final long serialVersionUID = 1L; + + @Override public boolean getScrollableTracksViewportWidth() { return false; } diff --git a/src/main/java/com/rarchives/ripme/ui/RipStatusHandler.java b/src/main/java/com/rarchives/ripme/ui/RipStatusHandler.java new file mode 100644 index 00000000..d99d039d --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ui/RipStatusHandler.java @@ -0,0 +1,14 @@ + +package com.rarchives.ripme.ui; + +import com.rarchives.ripme.ripper.AbstractRipper; + +/** + * + * @author Mads + */ +public interface RipStatusHandler { + + public void update(AbstractRipper ripper, RipStatusMessage message); + +} From 95912b834385fe5fad09cfd7d144d847845d5468 Mon Sep 17 00:00:00 2001 From: Mads Date: Thu, 13 Mar 2014 20:38:11 +0100 Subject: [PATCH 4/5] small fix --- src/main/java/com/rarchives/ripme/ui/MainWindow.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/ui/MainWindow.java b/src/main/java/com/rarchives/ripme/ui/MainWindow.java index 43edc92e..87b6c056 100644 --- a/src/main/java/com/rarchives/ripme/ui/MainWindow.java +++ b/src/main/java/com/rarchives/ripme/ui/MainWindow.java @@ -101,7 +101,7 @@ public class MainWindow implements Runnable, RipStatusHandler { mainFrame.setVisible(true); } - public synchronized static void status(String text) { + private void status(String text) { statusLabel.setText(text); mainFrame.pack(); } From 56cecd243d9952093391373e6712f1de370ebf6b Mon Sep 17 00:00:00 2001 From: Mads Date: Sun, 16 Mar 2014 23:30:41 +0100 Subject: [PATCH 5/5] Reverting deletion of GoneWildRipper --- .../ripme/ripper/rippers/GonewildRipper.java | 112 ++++++++++++++++++ .../ripper/rippers/GonewildRipperTest.java | 31 +++++ 2 files changed, 143 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/GonewildRipper.java create mode 100644 src/test/java/com/rarchives/ripme/tst/ripper/rippers/GonewildRipperTest.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GonewildRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GonewildRipper.java new file mode 100644 index 00000000..e3709442 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/GonewildRipper.java @@ -0,0 +1,112 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.log4j.Logger; +import org.json.JSONArray; +import org.json.JSONObject; +import org.jsoup.Jsoup; + +import com.rarchives.ripme.ripper.AbstractRipper; +import com.rarchives.ripme.utils.Utils; + +public class GonewildRipper extends AbstractRipper { + + private static final String HOST = "gonewild"; + private static final Logger logger = Logger.getLogger(GonewildRipper.class); + private static final int SLEEP_TIME = 1000; + + private static String API_DOMAIN; + private String username; + + public GonewildRipper(URL url) throws IOException { + super(url); + API_DOMAIN = Utils.getConfigString("gw.api", "gonewild"); + } + + @Override + public boolean canRip(URL url) { + return getUsernameMatcher(url).matches(); + } + + private Matcher getUsernameMatcher(URL url) { + Pattern p = Pattern.compile("^https?://[a-z]{0,3}\\.?gonewild\\.com/(u|user)/([a-zA-Z0-9\\-]{3,})/?.*$"); + return p.matcher(url.toExternalForm()); + } + + @Override + public URL sanitizeURL(URL url) throws MalformedURLException { + return url; + } + + @Override + public void rip() throws IOException { + int start = 0, + count = 50; + String baseGwURL = "http://" + API_DOMAIN + ".rarchives.com/api.cgi" + + "?method=get_user" + + "&user=" + username + + "&count=" + count; + String gwURL, jsonString, imagePath; + JSONArray posts, images; + JSONObject json, post, image; + while (true) { + logger.info(" Retrieving posts by " + username); + gwURL = baseGwURL + + "&start=" + start; + start += count; + jsonString = Jsoup.connect(gwURL) + .ignoreContentType(true) + .execute() + .body(); + json = new JSONObject(jsonString); + if (json.has("error")) { + logger.error("Error while retrieving user posts:" + json.getString("error")); + break; + } + posts = json.getJSONArray("posts"); + if (posts.length() == 0) { + break; // No more posts to get + } + for (int i = 0; i < posts.length(); i++) { + post = (JSONObject) posts.get(i); + images = post.getJSONArray("images"); + for (int j = 0; j < images.length(); j++) { + image = (JSONObject) images.get(j); + imagePath = image.getString("path"); + if (imagePath.startsWith("..")) { + imagePath = imagePath.substring(2); + } + imagePath = "http://" + API_DOMAIN + ".rarchives.com" + imagePath; + logger.info(" Found file: " + imagePath); + addURLToDownload(new URL(imagePath)); + } + } + try { + Thread.sleep(SLEEP_TIME); + } catch (InterruptedException e) { + logger.error("[!] Interrupted while waiting to load more posts", e); + break; + } + } + waitForThreads(); + } + + @Override + public String getHost() { + return HOST; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Matcher m = getUsernameMatcher(url); + if (m.matches()) { + this.username = m.group(m.groupCount()); + } + return username; + } +} diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/GonewildRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/GonewildRipperTest.java new file mode 100644 index 00000000..db269ea1 --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/GonewildRipperTest.java @@ -0,0 +1,31 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import java.io.IOException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; + +import com.rarchives.ripme.ripper.rippers.GonewildRipper; + +public class GonewildRipperTest extends RippersTest { + + public void testInstagramAlbums() throws IOException { + if (!DOWNLOAD_CONTENT) { + return; + } + List contentURLs = new ArrayList(); + contentURLs.add(new URL("http://gonewild.com/u/amle69")); + for (URL url : contentURLs) { + try { + GonewildRipper ripper = new GonewildRipper(url); + ripper.rip(); + assert(ripper.getWorkingDir().listFiles().length > 1); + deleteDir(ripper.getWorkingDir()); + } catch (Exception e) { + e.printStackTrace(); + fail("Error while ripping URL " + url + ": " + e.getMessage()); + } + } + } + +}