ImgScroll/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java

package com.rarchives.ripme.ripper.rippers;


import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Utils;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import com.rarchives.ripme.utils.Http;


public class GfycatRipper extends AbstractHTMLRipper {

    private static final String HOST = "gfycat.com";

    public GfycatRipper(URL url) throws IOException {
        super(url);
    }

    @Override
    public String getDomain() {
        return "gfycat.com";
    }

    @Override
    public String getHost() {
        return "gfycat";
    }

    @Override
    public boolean canRip(URL url) {
        return url.getHost().endsWith(HOST);
    }

    @Override
    public URL sanitizeURL(URL url) throws MalformedURLException {
        url = new URL(url.toExternalForm().replace("/gifs/detail", ""));
        
        return url;
    }

    @Override
    public Document getFirstPage() throws IOException {
        return Http.url(url).get();
    }

    @Override
    public void downloadURL(URL url, int index) {
        addURLToDownload(url, getPrefix(index));
    }

    @Override
    public String getGID(URL url) throws MalformedURLException {
        Pattern p = Pattern.compile("^https?://[wm.]*gfycat\\.com/([a-zA-Z0-9]+).*$");
        Matcher m = p.matcher(url.toExternalForm());
        if (m.matches()) {
            return m.group(1);
        }

        throw new MalformedURLException(
                "Expected gfycat.com format:"
                        + "gfycat.com/id"
                        + " Got: " + url);
    }

    @Override
    public List<String> getURLsFromPage(Document doc) {
        List<String> result = new ArrayList<>();
        Elements videos = doc.select("source#mp4Source");
        String vidUrl = videos.first().attr("src");
        if (vidUrl.startsWith("//")) {
            vidUrl = "http:" + vidUrl;
        }
        result.add(vidUrl);
        return result;
    }

    /**
     * Helper method for retrieving video URLs.
     * @param url URL to gfycat page
     * @return URL to video
     * @throws IOException
     */
    public static String getVideoURL(URL url) throws IOException {
        LOGGER.info("Retrieving " + url.toExternalForm());

        //Sanitize the URL first
        url = new URL(url.toExternalForm().replace("/gifs/detail", ""));

        Document doc = Http.url(url).get();
        Elements videos = doc.select("source#mp4Source");
        if (videos.isEmpty()) {
            throw new IOException("Could not find source#mp4source at " + url);
        }
        String vidUrl = videos.first().attr("src");
        if (vidUrl.startsWith("//")) {
            vidUrl = "http:" + vidUrl;
        }
        return vidUrl;
    }

    private int bytesTotal = 1;
    private int bytesCompleted = 1;

    @Override
    public String getStatusText() {
        return String.valueOf(getCompletionPercentage()) +
                "%  - " +
                Utils.bytesToHumanReadable(bytesCompleted) +
                " / " +
                Utils.bytesToHumanReadable(bytesTotal);
    }

    @Override
    public int getCompletionPercentage() {
        return (int) (100 * (bytesCompleted / (float) bytesTotal));
    }

    @Override
    public void setBytesTotal(int bytes) {
        this.bytesTotal = bytes;
    }

    @Override
    public void setBytesCompleted(int bytes) {
        this.bytesCompleted = bytes;
    }

    @Override
    public boolean useByteProgessBar() {return true;}
}
GfycatRipper now uses AbstractHTMLRipper 2018-06-26 08:28:14 +02:00			`package com.rarchives.ripme.ripper.rippers;`

1.0.69 - Gfycat video ripper, instagram username fix For #8 2014-06-20 10:59:01 +02:00
			`import java.io.IOException;`
			`import java.net.MalformedURLException;`
			`import java.net.URL;`
GfycatRipper now uses AbstractHTMLRipper 2018-06-26 08:28:14 +02:00			`import java.util.ArrayList;`
			`import java.util.List;`
1.0.69 - Gfycat video ripper, instagram username fix For #8 2014-06-20 10:59:01 +02:00			`import java.util.regex.Matcher;`
			`import java.util.regex.Pattern;`

GfycatRipper now uses AbstractHTMLRipper 2018-06-26 08:28:14 +02:00			`import com.rarchives.ripme.ripper.AbstractHTMLRipper;`
			`import com.rarchives.ripme.utils.Utils;`
1.0.69 - Gfycat video ripper, instagram username fix For #8 2014-06-20 10:59:01 +02:00			`import org.jsoup.nodes.Document;`
			`import org.jsoup.select.Elements;`

Using new wrapper for HTTP requests, started abstract classes to simplify rippers 2014-06-22 02:08:42 +02:00			`import com.rarchives.ripme.utils.Http;`
1.0.69 - Gfycat video ripper, instagram username fix For #8 2014-06-20 10:59:01 +02:00
GfycatRipper now uses AbstractHTMLRipper 2018-06-26 08:28:14 +02:00
			`public class GfycatRipper extends AbstractHTMLRipper {`
1.0.69 - Gfycat video ripper, instagram username fix For #8 2014-06-20 10:59:01 +02:00
			`private static final String HOST = "gfycat.com";`

			`public GfycatRipper(URL url) throws IOException {`
			`super(url);`
			`}`

GfycatRipper now uses AbstractHTMLRipper 2018-06-26 08:28:14 +02:00			`@Override`
			`public String getDomain() {`
			`return "gfycat.com";`
			`}`

1.0.69 - Gfycat video ripper, instagram username fix For #8 2014-06-20 10:59:01 +02:00			`@Override`
			`public String getHost() {`
GfycatRipper now uses AbstractHTMLRipper 2018-06-26 08:28:14 +02:00			`return "gfycat";`
1.0.69 - Gfycat video ripper, instagram username fix For #8 2014-06-20 10:59:01 +02:00			`}`

			`@Override`
			`public boolean canRip(URL url) {`
			`return url.getHost().endsWith(HOST);`
			`}`
Trim trailing whitespace, remove some unused imports. 2017-05-10 00:22:55 +02:00
1.0.69 - Gfycat video ripper, instagram username fix For #8 2014-06-20 10:59:01 +02:00			`@Override`
			`public URL sanitizeURL(URL url) throws MalformedURLException {`
Gfycat Tests & Fix for bad reddit submissions Added sanitization, and appropriate tests. Task #361 - Some gfycat doesn't work 2018-01-05 23:01:49 +01:00			`url = new URL(url.toExternalForm().replace("/gifs/detail", ""));`

1.0.69 - Gfycat video ripper, instagram username fix For #8 2014-06-20 10:59:01 +02:00			`return url;`
			`}`

GfycatRipper now uses AbstractHTMLRipper 2018-06-26 08:28:14 +02:00			`@Override`
			`public Document getFirstPage() throws IOException {`
			`return Http.url(url).get();`
			`}`

			`@Override`
			`public void downloadURL(URL url, int index) {`
			`addURLToDownload(url, getPrefix(index));`
			`}`

1.0.69 - Gfycat video ripper, instagram username fix For #8 2014-06-20 10:59:01 +02:00			`@Override`
			`public String getGID(URL url) throws MalformedURLException {`
			`Pattern p = Pattern.compile("^https?://[wm.]gfycat\\.com/([a-zA-Z0-9]+).$");`
			`Matcher m = p.matcher(url.toExternalForm());`
			`if (m.matches()) {`
			`return m.group(1);`
			`}`

			`throw new MalformedURLException(`
			`"Expected gfycat.com format:"`
			`+ "gfycat.com/id"`
			`+ " Got: " + url);`
			`}`

			`@Override`
GfycatRipper now uses AbstractHTMLRipper 2018-06-26 08:28:14 +02:00			`public List<String> getURLsFromPage(Document doc) {`
			`List<String> result = new ArrayList<>();`
			`Elements videos = doc.select("source#mp4Source");`
			`String vidUrl = videos.first().attr("src");`
			`if (vidUrl.startsWith("//")) {`
			`vidUrl = "http:" + vidUrl;`
			`}`
			`result.add(vidUrl);`
			`return result;`
1.0.70 Reddit ripper retrieves gfycat and vidble links As requested in #8 2014-06-25 11:03:47 +02:00			`}`

			`/**`
			`* Helper method for retrieving video URLs.`
			`* @param url URL to gfycat page`
			`* @return URL to video`
			`* @throws IOException`
			`*/`
			`public static String getVideoURL(URL url) throws IOException {`
Changed logger to LOGGER 2018-06-03 03:14:41 +02:00			`LOGGER.info("Retrieving " + url.toExternalForm());`
GfycatRipper now uses AbstractHTMLRipper 2018-06-26 08:28:14 +02:00
Gfycat Tests & Fix for bad reddit submissions Added sanitization, and appropriate tests. Task #361 - Some gfycat doesn't work 2018-01-05 23:01:49 +01:00			`//Sanitize the URL first`
			`url = new URL(url.toExternalForm().replace("/gifs/detail", ""));`
GfycatRipper now uses AbstractHTMLRipper 2018-06-26 08:28:14 +02:00
1.0.70 Reddit ripper retrieves gfycat and vidble links As requested in #8 2014-06-25 11:03:47 +02:00			`Document doc = Http.url(url).get();`
Fix gfycat, closes #269 2015-12-19 15:25:04 +01:00			`Elements videos = doc.select("source#mp4Source");`
Replaced use of Collection.size () by Collection.isEmpty () which makes the code more readable and may have more performance 2018-05-30 04:48:44 +02:00			`if (videos.isEmpty()) {`
1.0.69 - Gfycat video ripper, instagram username fix For #8 2014-06-20 10:59:01 +02:00			`throw new IOException("Could not find source#mp4source at " + url);`
			`}`
			`String vidUrl = videos.first().attr("src");`
			`if (vidUrl.startsWith("//")) {`
			`vidUrl = "http:" + vidUrl;`
			`}`
1.0.70 Reddit ripper retrieves gfycat and vidble links As requested in #8 2014-06-25 11:03:47 +02:00			`return vidUrl;`
1.0.69 - Gfycat video ripper, instagram username fix For #8 2014-06-20 10:59:01 +02:00			`}`
GfycatRipper now uses AbstractHTMLRipper 2018-06-26 08:28:14 +02:00
			`private int bytesTotal = 1;`
			`private int bytesCompleted = 1;`

			`@Override`
			`public String getStatusText() {`
			`return String.valueOf(getCompletionPercentage()) +`
			`"% - " +`
			`Utils.bytesToHumanReadable(bytesCompleted) +`
			`" / " +`
			`Utils.bytesToHumanReadable(bytesTotal);`
			`}`

			`@Override`
			`public int getCompletionPercentage() {`
			`return (int) (100 * (bytesCompleted / (float) bytesTotal));`
			`}`

			`@Override`
			`public void setBytesTotal(int bytes) {`
			`this.bytesTotal = bytes;`
			`}`

			`@Override`
			`public void setBytesCompleted(int bytes) {`
			`this.bytesCompleted = bytes;`
			`}`

			`@Override`
			`public boolean useByteProgessBar() {return true;}`
1.0.69 - Gfycat video ripper, instagram username fix For #8 2014-06-20 10:59:01 +02:00			`}`