需要配合html2md工具 https://www.eqishare.com/technology/1236.html
需求:批量把博客文章转成md文档。
代码只是简单调试成功运行,自行优化使用。
引入pom:
<!--引入HTML转Markdown的插件--> <dependency> <groupId>io.github.furstenheim</groupId> <artifactId>copy_down</artifactId> <version>1.0</version> </dependency>
代码:
package com.luchao.untils; import com.alibaba.fastjson.JSONObject; import io.github.furstenheim.CopyDown; import io.github.furstenheim.Options; import io.github.furstenheim.OptionsBuilder; import java.io.*; import java.net.URL; import java.util.HashMap; /** * html转md * * @author ludeng * @date 2024-07-25 11:34 */ public class test2 { public static void main(String[] args) { test2 test = new test2(); String filePath = "D:\\alllinks.txt"; // 替换为实际的文件路径 try (BufferedReader br = new BufferedReader(new FileReader(filePath))) { String line; while ((line = br.readLine()) != null) { System.out.println(line); test.getmd(line); } } catch (IOException e) { System.out.println("读取文件时出错:" + e.getMessage()); } } /** * 测试GET请求 */ private void testGet() { String gurl = "https://www.xxxx.com/video/1232.html"; String url = "https://www.helloworld.net/getUrlHtml?url=" + gurl; try { String result = HttpClientUtils.get(url); System.out.println(result); } catch (IOException e) { e.printStackTrace(); } } private void getmd(String blogUrl) { try { // String gurl = "https://www.xxxx.com/video/1232.html"; String gurl = blogUrl; String url = "https://www.helloworld.net/getUrlHtml?url=" + gurl; String result = HttpClientUtils.get(url); System.out.println(result); JSONObject jsonObject = JSONObject.parseObject(result); jsonObject.get("code"); jsonObject.get("html"); jsonObject.get("title"); //没报错 if (null != jsonObject.get("code") && "1".equals(jsonObject.get("code").toString())) { String zzhh = htmlTansToMarkdown((jsonObject.get("html").toString())); // System.out.println(zzhh); String urlGetMd = "https://www.helloworld.net/getMdFile"; HashMap<String, String> map = new HashMap<>(); map.put("md", zzhh); map.put("url", "https://www.helloworld.net"); String result1 = HttpClientUtils.post(urlGetMd, map); System.out.println(result1); JSONObject jsonObject1 = JSONObject.parseObject(result1); jsonObject1.get("path"); saveFile(jsonObject1.get("path").toString(), jsonObject.get("title").toString()); } } catch (IOException e) { e.printStackTrace(); } } /** * 转换 * * @param htmlStr * @return */ public static String htmlTansToMarkdown(String htmlStr) { OptionsBuilder optionsBuilder = OptionsBuilder.anOptions(); Options options = optionsBuilder.withBr("-") // more options .build(); CopyDown converter = new CopyDown(options); String markdownText = converter.convert(htmlStr); return markdownText; } /** * 保存文件 * * @param urlstr * @param newName */ public static void saveFile(String urlstr, String newName) { // String fileURL = "https://www.helloworld.net/download/xxx.md"; // 替换为实际的在线文档URL String fileURL = urlstr; // 替换为实际的在线文档URL String saveFilePath = "D:\\" + newName + ".md"; // 替换为实际的保存路径 try (BufferedInputStream in = new BufferedInputStream(new URL(fileURL).openStream()); FileOutputStream fileOutputStream = new FileOutputStream(saveFilePath)) { byte dataBuffer[] = new byte[1024]; int bytesRead; while ((bytesRead = in.read(dataBuffer, 0, 1024)) != -1) { fileOutputStream.write(dataBuffer, 0, bytesRead); } System.out.println("文件下载成功:" + newName); } catch (IOException e) { System.out.println("下载文件时出错:" + e.getMessage()); } } }