ImgScroll/src/main/java/com/rarchives/ripme/ripper/rippers/TapasticRipper.java

package com.rarchives.ripme.ripper.rippers;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.json.JSONArray;
import org.json.JSONObject;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;

class TapasticEpisode {
    int id;
    String filename;
    public TapasticEpisode(int index, int id, String title) {
        int index1 = index;
        this.id    = id;
        String title1 = title;
        this.filename = Utils.filesystemSafe(title);
    }
}

public class TapasticRipper extends AbstractHTMLRipper {

    private List<TapasticEpisode> episodes= new ArrayList<>();

    public TapasticRipper(URL url) throws IOException {
        super(url);
    }

    @Override
    public String getDomain() {
        return "tapas.io";
    }

    @Override
    public String getHost() {
        return "tapas";
    }

    @Override
    public Document getFirstPage() throws IOException {
        return Http.url(url).get();
    }

    @Override
    public List<String> getURLsFromPage(Document page) {
        List<String> urls = new ArrayList<>();
        String html = page.data();
        if (!html.contains("episodeList : ")) {
            LOGGER.error("No 'episodeList' found at " + this.url);
            return urls;
        }
        String jsonString = Utils.between(html, "episodeList : ", ",\n").get(0);
        JSONArray json = new JSONArray(jsonString);
        for (int i = 0; i < json.length(); i++) {
            JSONObject obj = json.getJSONObject(i);
            TapasticEpisode episode = new TapasticEpisode(i, obj.getInt("id"), obj.getString("title"));
            episodes.add(episode);
            urls.add("http://tapastic.com/episode/" + episode.id);
        }
        return urls;
    }

    @Override
    public void downloadURL(URL url, int index) {
        try {
            Document doc = Http.url(url).get();
            Elements images = doc.select("article.ep-contents img");
            // Find maximum # of images for optimal filename indexing
            int epiLog = (int) (Math.floor(Math.log10(episodes.size())) + 1),
                imgLog = (int) (Math.floor(Math.log10(images.size()  )) + 1);
            for (int i = 0; i < images.size(); i++) {
                String link = images.get(i).attr("src");
                TapasticEpisode episode = episodes.get(index - 1);
                // Build elaborate filename prefix
                StringBuilder prefix = new StringBuilder();
                prefix.append(String.format("ep%0" + epiLog + "d", index));
                prefix.append(String.format("-%0" + imgLog + "dof%0" + imgLog + "d-", i + 1, images.size()));
                prefix.append(episode.filename.replace(" ", "-"));
                prefix.append("-");
                addURLToDownload(new URL(link), prefix.toString());
                if (isThisATest()) {
                    break;
                }
            }
        } catch (IOException e) {
            LOGGER.error("[!] Exception while downloading " + url, e);
        }

    }

    @Override
    public String getGID(URL url) throws MalformedURLException {
        Pattern p = Pattern.compile("^https?://tapas.io/series/([^/?]+).*$");
        Matcher m = p.matcher(url.toExternalForm());
        if (m.matches()) {
            return "series_ " + m.group(1);
        }
        p = Pattern.compile("^https?://tapas.io/episode/([^/?]+).*$");
        m = p.matcher(url.toExternalForm());
        if (m.matches()) {
            return "ep_" + m.group(1);
        }
        throw new MalformedURLException("Expected tapastic.com URL format: "
                + "tapastic.com/[series|episode]/name - got " + url + " instead");
    }
}
Added Tapastic ripper 2014-06-30 23:21:22 +02:00			`package com.rarchives.ripme.ripper.rippers;`

			`import java.io.IOException;`
			`import java.net.MalformedURLException;`
			`import java.net.URL;`
			`import java.util.ArrayList;`
			`import java.util.List;`
			`import java.util.regex.Matcher;`
			`import java.util.regex.Pattern;`

			`import org.json.JSONArray;`
			`import org.json.JSONObject;`
			`import org.jsoup.nodes.Document;`
			`import org.jsoup.select.Elements;`

			`import com.rarchives.ripme.ripper.AbstractHTMLRipper;`
			`import com.rarchives.ripme.utils.Http;`
1.0.76 - Taptastic ripper improvements New (improved) filename scheme. Shortened code length/complexity. Can now rip all episodes in series from a single /episode/ page. Code formatting the way I like it. From pull request #86. 2014-07-02 07:48:02 +02:00			`import com.rarchives.ripme.utils.Utils;`
Added Tapastic ripper 2014-06-30 23:21:22 +02:00
			`class TapasticEpisode {`
Update to Java 8 * Changed the Maven target to 1.8 * Performed a preliminary cleanup using IntelliJ's Code Analysis (Only Java 7/8 updates and a few other entries in the Error and Warnings categories) * Updated the readme to change the required Java version 2017-10-24 16:33:28 +02:00			`int id;`
			`String filename;`
Added Tapastic ripper 2014-06-30 23:21:22 +02:00			`public TapasticEpisode(int index, int id, String title) {`
Update to Java 8 * Changed the Maven target to 1.8 * Performed a preliminary cleanup using IntelliJ's Code Analysis (Only Java 7/8 updates and a few other entries in the Error and Warnings categories) * Updated the readme to change the required Java version 2017-10-24 16:33:28 +02:00			`int index1 = index;`
1.0.76 - Taptastic ripper improvements New (improved) filename scheme. Shortened code length/complexity. Can now rip all episodes in series from a single /episode/ page. Code formatting the way I like it. From pull request #86. 2014-07-02 07:48:02 +02:00			`this.id = id;`
Update to Java 8 * Changed the Maven target to 1.8 * Performed a preliminary cleanup using IntelliJ's Code Analysis (Only Java 7/8 updates and a few other entries in the Error and Warnings categories) * Updated the readme to change the required Java version 2017-10-24 16:33:28 +02:00			`String title1 = title;`
1.0.76 - Taptastic ripper improvements New (improved) filename scheme. Shortened code length/complexity. Can now rip all episodes in series from a single /episode/ page. Code formatting the way I like it. From pull request #86. 2014-07-02 07:48:02 +02:00			`this.filename = Utils.filesystemSafe(title);`
Added Tapastic ripper 2014-06-30 23:21:22 +02:00			`}`
			`}`

			`public class TapasticRipper extends AbstractHTMLRipper {`

Update to Java 8 * Changed the Maven target to 1.8 * Performed a preliminary cleanup using IntelliJ's Code Analysis (Only Java 7/8 updates and a few other entries in the Error and Warnings categories) * Updated the readme to change the required Java version 2017-10-24 16:33:28 +02:00			`private List<TapasticEpisode> episodes= new ArrayList<>();`
Added Tapastic ripper 2014-06-30 23:21:22 +02:00
			`public TapasticRipper(URL url) throws IOException {`
			`super(url);`
			`}`

			`@Override`
			`public String getDomain() {`
Fixed ripper for tapas.io 2017-11-23 06:51:37 +01:00			`return "tapas.io";`
Added Tapastic ripper 2014-06-30 23:21:22 +02:00			`}`

			`@Override`
			`public String getHost() {`
Fixed ripper for tapas.io 2017-11-23 06:51:37 +01:00			`return "tapas";`
Added Tapastic ripper 2014-06-30 23:21:22 +02:00			`}`

			`@Override`
			`public Document getFirstPage() throws IOException {`
			`return Http.url(url).get();`
			`}`

			`@Override`
			`public List<String> getURLsFromPage(Document page) {`
Update to Java 8 * Changed the Maven target to 1.8 * Performed a preliminary cleanup using IntelliJ's Code Analysis (Only Java 7/8 updates and a few other entries in the Error and Warnings categories) * Updated the readme to change the required Java version 2017-10-24 16:33:28 +02:00			`List<String> urls = new ArrayList<>();`
1.0.76 - Taptastic ripper improvements New (improved) filename scheme. Shortened code length/complexity. Can now rip all episodes in series from a single /episode/ page. Code formatting the way I like it. From pull request #86. 2014-07-02 07:48:02 +02:00			`String html = page.data();`
			`if (!html.contains("episodeList : ")) {`
Changed logger to LOGGER 2018-06-03 03:14:41 +02:00			`LOGGER.error("No 'episodeList' found at " + this.url);`
1.0.76 - Taptastic ripper improvements New (improved) filename scheme. Shortened code length/complexity. Can now rip all episodes in series from a single /episode/ page. Code formatting the way I like it. From pull request #86. 2014-07-02 07:48:02 +02:00			`return urls;`
			`}`
			`String jsonString = Utils.between(html, "episodeList : ", ",\n").get(0);`
			`JSONArray json = new JSONArray(jsonString);`
			`for (int i = 0; i < json.length(); i++) {`
			`JSONObject obj = json.getJSONObject(i);`
			`TapasticEpisode episode = new TapasticEpisode(i, obj.getInt("id"), obj.getString("title"));`
			`episodes.add(episode);`
			`urls.add("http://tapastic.com/episode/" + episode.id);`
Added Tapastic ripper 2014-06-30 23:21:22 +02:00			`}`
			`return urls;`
			`}`

			`@Override`
			`public void downloadURL(URL url, int index) {`
			`try {`
1.0.76 - Taptastic ripper improvements New (improved) filename scheme. Shortened code length/complexity. Can now rip all episodes in series from a single /episode/ page. Code formatting the way I like it. From pull request #86. 2014-07-02 07:48:02 +02:00			`Document doc = Http.url(url).get();`
			`Elements images = doc.select("article.ep-contents img");`
			`// Find maximum # of images for optimal filename indexing`
			`int epiLog = (int) (Math.floor(Math.log10(episodes.size())) + 1),`
			`imgLog = (int) (Math.floor(Math.log10(images.size() )) + 1);`
			`for (int i = 0; i < images.size(); i++) {`
			`String link = images.get(i).attr("src");`
			`TapasticEpisode episode = episodes.get(index - 1);`
			`// Build elaborate filename prefix`
			`StringBuilder prefix = new StringBuilder();`
			`prefix.append(String.format("ep%0" + epiLog + "d", index));`
			`prefix.append(String.format("-%0" + imgLog + "dof%0" + imgLog + "d-", i + 1, images.size()));`
			`prefix.append(episode.filename.replace(" ", "-"));`
			`prefix.append("-");`
			`addURLToDownload(new URL(link), prefix.toString());`
Various fixes to tests: Ability to set log level, lots of debugging messages Turn on debug logging during tests, simplified test cases for HTML ripper Fix fusktator ripper, added test Fixed gifyo, added test Added tests for all rippers Adding a few album-guessing URLs 2015-02-10 08:29:29 +01:00			`if (isThisATest()) {`
			`break;`
			`}`
1.0.76 - Taptastic ripper improvements New (improved) filename scheme. Shortened code length/complexity. Can now rip all episodes in series from a single /episode/ page. Code formatting the way I like it. From pull request #86. 2014-07-02 07:48:02 +02:00			`}`
Added Tapastic ripper 2014-06-30 23:21:22 +02:00			`} catch (IOException e) {`
Changed logger to LOGGER 2018-06-03 03:14:41 +02:00			`LOGGER.error("[!] Exception while downloading " + url, e);`
Added Tapastic ripper 2014-06-30 23:21:22 +02:00			`}`

			`}`

			`@Override`
			`public String getGID(URL url) throws MalformedURLException {`
Fixed ripper for tapas.io 2017-11-23 06:51:37 +01:00			`Pattern p = Pattern.compile("^https?://tapas.io/series/([^/?]+).*$");`
Added Tapastic ripper 2014-06-30 23:21:22 +02:00			`Matcher m = p.matcher(url.toExternalForm());`
			`if (m.matches()) {`
1.0.76 - Taptastic ripper improvements New (improved) filename scheme. Shortened code length/complexity. Can now rip all episodes in series from a single /episode/ page. Code formatting the way I like it. From pull request #86. 2014-07-02 07:48:02 +02:00			`return "series_ " + m.group(1);`
			`}`
Fixed ripper for tapas.io 2017-11-23 06:51:37 +01:00			`p = Pattern.compile("^https?://tapas.io/episode/([^/?]+).*$");`
1.0.76 - Taptastic ripper improvements New (improved) filename scheme. Shortened code length/complexity. Can now rip all episodes in series from a single /episode/ page. Code formatting the way I like it. From pull request #86. 2014-07-02 07:48:02 +02:00			`m = p.matcher(url.toExternalForm());`
			`if (m.matches()) {`
			`return "ep_" + m.group(1);`
Added Tapastic ripper 2014-06-30 23:21:22 +02:00			`}`
			`throw new MalformedURLException("Expected tapastic.com URL format: "`
1.0.76 - Taptastic ripper improvements New (improved) filename scheme. Shortened code length/complexity. Can now rip all episodes in series from a single /episode/ page. Code formatting the way I like it. From pull request #86. 2014-07-02 07:48:02 +02:00			`+ "tapastic.com/[series\|episode]/name - got " + url + " instead");`
Added Tapastic ripper 2014-06-30 23:21:22 +02:00			`}`
			`}`