需要配合html2md工具  https://www.eqishare.com/technology/1236.html

需求:批量把博客文章转成md文档。

代码只是简单调试成功运行,自行优化使用。

引入pom:

<!--引入HTML转Markdown的插件-->
<dependency>
    <groupId>io.github.furstenheim</groupId>
    <artifactId>copy_down</artifactId>
    <version>1.0</version>
</dependency>

代码:

package com.luchao.untils;

import com.alibaba.fastjson.JSONObject;
import io.github.furstenheim.CopyDown;
import io.github.furstenheim.Options;
import io.github.furstenheim.OptionsBuilder;

import java.io.*;
import java.net.URL;
import java.util.HashMap;

/**
 * html转md
 *
 * @author ludeng
 * @date 2024-07-25 11:34
 */
public class test2 {

    public static void main(String[] args) {
        test2 test = new test2();

        String filePath = "D:\\alllinks.txt"; // 替换为实际的文件路径

        try (BufferedReader br = new BufferedReader(new FileReader(filePath))) {
            String line;
            while ((line = br.readLine()) != null) {
                System.out.println(line);
                test.getmd(line);
            }
        } catch (IOException e) {
            System.out.println("读取文件时出错:" + e.getMessage());
        }

    }


    /**
     * 测试GET请求
     */
    private void testGet() {
        String gurl = "https://www.xxxx.com/video/1232.html";
        String url = "https://www.helloworld.net/getUrlHtml?url=" + gurl;
        try {
            String result = HttpClientUtils.get(url);
            System.out.println(result);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private void getmd(String blogUrl) {
        try {
//            String gurl = "https://www.xxxx.com/video/1232.html";
            String gurl = blogUrl;
            String url = "https://www.helloworld.net/getUrlHtml?url=" + gurl;
            String result = HttpClientUtils.get(url);
            System.out.println(result);
            JSONObject jsonObject = JSONObject.parseObject(result);
            jsonObject.get("code");
            jsonObject.get("html");
            jsonObject.get("title");
            //没报错
            if (null != jsonObject.get("code") && "1".equals(jsonObject.get("code").toString())) {
                String zzhh = htmlTansToMarkdown((jsonObject.get("html").toString()));
                //            System.out.println(zzhh);
                String urlGetMd = "https://www.helloworld.net/getMdFile";
                HashMap<String, String> map = new HashMap<>();
                map.put("md", zzhh);
                map.put("url", "https://www.helloworld.net");
                String result1 = HttpClientUtils.post(urlGetMd, map);
                System.out.println(result1);
                JSONObject jsonObject1 = JSONObject.parseObject(result1);
                jsonObject1.get("path");
                saveFile(jsonObject1.get("path").toString(), jsonObject.get("title").toString());
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

    }

    /**
     * 转换
     *
     * @param htmlStr
     * @return
     */
    public static String htmlTansToMarkdown(String htmlStr) {
        OptionsBuilder optionsBuilder = OptionsBuilder.anOptions();
        Options options = optionsBuilder.withBr("-")
                // more options
                .build();
        CopyDown converter = new CopyDown(options);
        String markdownText = converter.convert(htmlStr);
        return markdownText;
    }

    /**
     * 保存文件
     *
     * @param urlstr
     * @param newName
     */
    public static void saveFile(String urlstr, String newName) {
//        String fileURL = "https://www.helloworld.net/download/xxx.md"; // 替换为实际的在线文档URL
        String fileURL = urlstr; // 替换为实际的在线文档URL
        String saveFilePath = "D:\\" + newName + ".md"; // 替换为实际的保存路径

        try (BufferedInputStream in = new BufferedInputStream(new URL(fileURL).openStream());
             FileOutputStream fileOutputStream = new FileOutputStream(saveFilePath)) {
            byte dataBuffer[] = new byte[1024];
            int bytesRead;
            while ((bytesRead = in.read(dataBuffer, 0, 1024)) != -1) {
                fileOutputStream.write(dataBuffer, 0, bytesRead);
            }
            System.out.println("文件下载成功:" + newName);
        } catch (IOException e) {
            System.out.println("下载文件时出错:" + e.getMessage());
        }
    }


}