��HttpClient��ͼƬ-��ƿ��

��HttpClient��ͼƬ

2024-04-23 13

��Ȩ

��Ȩ��

��ɰ��ʵ��ע��û��Է��ף��Ȩ��ԭ��У��ƿ��ӵ��Ȩ��಻�е��Ӧ��Ρ��鿴�� ƿ��û��Э�� ƿ��֪ʶ��Ȩ��ָ��ֱ��ӳ�Ϯ��ݣ��д ��ȨͶ�߱��оٱ��һ��ʵ��ɾ��Ȩ��ݡ�

��漰�Ĳ�Ʒ

ʵʱ�� Flink �棬5000CU*H 3��

�� Elasticsearch �棬2��4GB��߹�� 1��

��ݿ��ƽ̨ DataWorks��ʱ��

��飺 ��HttpClient��ͼƬ

2000Ԫ��ƴ��ȯ��ȡ��2��4G�Ʒ��664Ԫ/3�꣬��û��Żݣ��>>>

��Ʋɹ��223Ԫ/3�꣩���ڣ��>>>��

��ѧ��9.5Ԫ/�£��ڣ��>>>��

ǰ��
��漼��Ϊ��ݻ�ȡ��Ҫ��ߣ��ڸ��и�ҵ��Ź㷺��Ӧ�á��ڱ��У��ǽ��Java�е�HttpClient�⣬ͨ��дһ��򵥶��Ч��ʵ��վ��ͼƬ�Ĺ��ܡ�ͨ��ӣ��ǲ��ѧϰ��HttpClient��󣬻��̽��Ļ��ԭ��ʵ�ַ��
��󳡾�
��ڿ��һ��Ƽ�Ӧ�ã��Ҫ��վ�ϻ�ȡͼƬ��ḻ�û��顣Ϊ��ʵ��Ҫ��дһ��Զ��վ�ϵ�ͼƬ��浽��ļ�ϵͳ�С�
Ŀ��
��ǵ��ҪĿ��Ǳ�дһ��ܹ��Զ��վͼƬ�ĳ��Ϊ��ʵ��Ŀ�꣬��Ҫ��¼��ؼ��⣺

��η��HTTP��󲢻�ȡ��ҳ��ݣ�
��δ��ҳ��ȡ��ͼƬ��URL��
��HttpClient��ͼƬ��أ�
��ȡ��
��ȡ��
��ʵ��ȡ��ͼƬ�Ĺ��У��ǿ��ܻ��¼��⣺
��ƣ��վ��ܻ��÷��ֹ��ķ��ʣ��Ҫ��ȡһЩ��ʩ��Щ��ƣ��ú��ʵ��ͷ��Ϣ��
ͼƬURL��ȡ��վ�ϵ�ͼƬ��ֲܷ��ڲ�ͬ��ҳ��ϣ��Ҫ��ҳ�ṹ��ҵ�ͼƬ��ڵ�λ�ã��ȡ��ͼƬ��URL��
��ȡ��
��ȡ��ͼƬ�Ĺ��̣�
��HTTP��ʹ��HttpClient�ⷢ��һ��GET��ȡ��վ��HTMLҳ�档
��HTML��HTML��Jsoup��ǽ��HTMLҳ�棬��ȡ��е�ͼƬURL��
��ͼƬURL��ȡ��ͼƬURL��ɸѡ�͹��ˣ�ֻ��ͼƬ��ӡ�
��ͼƬ��HttpClient�ⷢ��HTTP��󣬽�ͼƬ��ص��ļ�ϵͳ�С�
ʵ�ִ��
��Java��д��ʵ�ִ��ʾ��
```import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;

public class ImageDownloader {

public static void main(String[] args) {
    String url = "https://www.mafengwo.cn/";
    List<String> imageUrls = getImageUrls(url);
    downloadImages(imageUrls);
}

public static List<String> getImageUrls(String url) {
    List<String> imageUrls = new ArrayList<>();
    try {
        HttpClient httpClient = createHttpClientWithProxy();
        HttpGet httpGet = new HttpGet(url);
        HttpResponse response = httpClient.execute(httpGet);
        HttpEntity entity = response.getEntity();
        String html = EntityUtils.toString(entity);
        Document doc = Jsoup.parse(html);
        Elements imgElements = doc.getElementsByTag("img");
        for (Element imgElement : imgElements) {
            String imgUrl = imgElement.absUrl("src");
            if (!imgUrl.isEmpty()) {
                imageUrls.add(imgUrl);
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return imageUrls;
}

public static void downloadImages(List<String> imageUrls) {
    for (String imageUrl : imageUrls) {
        try {
            HttpClient httpClient = createHttpClientWithProxy();
            HttpGet httpGet = new HttpGet(imageUrl);
            HttpResponse response = httpClient.execute(httpGet);
            HttpEntity entity = response.getEntity();
            InputStream inputStream = entity.getContent();
            String fileName = imageUrl.substring(imageUrl.lastIndexOf("/") + 1);
            OutputStream outputStream = new FileOutputStream("images/" + fileName);
            byte[] buffer = new byte[1024];
            int bytesRead;
            while ((bytesRead = inputStream.read(buffer)) != -1) {
                outputStream.write(buffer, 0, bytesRead);
            }
            inputStream.close();
            outputStream.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

public static HttpClient createHttpClientWithProxy() {
    CredentialsProvider credsProvider = new BasicCredentialsProvider();
    credsProvider.setCredentials(
            new AuthScope("www.16yun.cn", 5445),
            new UsernamePasswordCredentials("16QMSOML", "280651"));

    HttpHost proxy = new HttpHost("www.16yun.cn", 5445);
    RequestConfig requestConfig = RequestConfig.custom()
            .setProxy(proxy)
            .build();

    return HttpClients.custom()
            .setDefaultCredentialsProvider(credsProvider)
            .setDefaultRequestConfig(requestConfig)
            .build();
}

}
```
��һ��Ż�
��Ȼ��Ĵ��ʵ�ּ򵥵�ͼƬ��ع��ܣ��ʵ��Ӧ��У��ǿ��ܻ��Ҫ��һЩ�Ż��͸Ľ��Ч�ʺͳ��׳�ԡ��һЩ��ܵ��Ż��
��߳��أ��ʹ�ö��̼߳��ٶȣ�ͬʱ��̡߳�
�쳣��п��ܳ��ֵ��쳣��ǿ��Ľ�׳�ԡ�
��ӳع��ʹ��ӳع��HTTP��ӣ��Ӵ��ٵĿ��ܡ�
�ϵ��֧�ֶϵ��ܣ��ж�ʱ��Դ��ϴ��жϵ�λ�ü��أ��ʡ��Դ��