Using Zyte Smart Proxy Manager with Java and Jsoup

Modified on Wed, 5 Jan, 2022 at 1:39 PM

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.io.IOException;
import java.util.Base64;
import java.util.List;

public class WebScraping {

    public static void main(String[] args) throws Exception {
        
        String authString = "<CRAWLERA_APIKEY>:";
        String encodedAuthString = Base64.getEncoder().encodeToString(authString.getBytes());
        
        final List<String> urls = Files.readAllLines(Paths.get(".", "urls.txt"));

        urls.parallelStream().forEach(url -> {
            try {
                final Document doc = Jsoup.connect(url)
                    .header("Proxy-Authorization", "Basic " + encodedAuthString)
                    .followRedirects(true)
                    .ignoreHttpErrors(true)
                    .ignoreContentType(true)
                    .timeout(180000)
                    .proxy("proxy.zyte.com", 8010)
                    .get();
                final String title = doc.select("title").text();
                System.out.println(Thread.currentThread().getName() + ": " + title);
            } catch (IOException e) {
                e.printStackTrace();
            }
        });
    }
}


Was this article helpful?

That’s Great!

Thank you for your feedback

Sorry! We couldn't be helpful

Thank you for your feedback

Let us know how can we improve this article!

Select at least one of the reasons
CAPTCHA verification is required.

Feedback sent

We appreciate your effort and will try to fix the article