引入cpdetector解决文件编码识别问题

This commit is contained in:
chenkailing
2020-12-26 19:13:50 +08:00
committed by kl
parent f2d929e6fa
commit 342c391a9b
23 changed files with 240 additions and 400 deletions

View File

@ -5,7 +5,7 @@ import cn.keking.hutool.URLUtil;
import cn.keking.model.FileAttribute;
import cn.keking.model.FileType;
import cn.keking.model.ReturnResponse;
import cn.keking.service.FilePreviewCommonService;
import cn.keking.service.FileHandlerService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
@ -21,50 +21,48 @@ import java.util.UUID;
@Component
public class DownloadUtils {
private final Logger logger = LoggerFactory.getLogger(DownloadUtils.class);
private final static Logger logger = LoggerFactory.getLogger(DownloadUtils.class);
private final String fileDir = ConfigConstants.getFileDir();
private final FilePreviewCommonService filePreviewCommonService;
public DownloadUtils(FilePreviewCommonService filePreviewCommonService) {
this.filePreviewCommonService = filePreviewCommonService;
}
private static final String URL_PARAM_FTP_USERNAME = "ftp.username";
private static final String URL_PARAM_FTP_PASSWORD = "ftp.password";
private static final String URL_PARAM_FTP_CONTROL_ENCODING = "ftp.control.encoding";
private final FileHandlerService fileHandlerService;
public DownloadUtils(FileHandlerService fileHandlerService) {
this.fileHandlerService = fileHandlerService;
}
/**
* @param fileAttribute fileAttribute
* @param fileName 文件名
* @param fileName 文件名
* @return 本地文件绝对路径
*/
public ReturnResponse<String> downLoad(FileAttribute fileAttribute, String fileName) {
public ReturnResponse<String> downLoad(FileAttribute fileAttribute, String fileName) {
String urlStr = fileAttribute.getUrl();
String type = fileAttribute.getSuffix();
ReturnResponse<String> response = new ReturnResponse<>(0, "下载成功!!!", "");
UUID uuid = UUID.randomUUID();
if (null == fileName) {
fileName = uuid+ "."+type;
fileName = uuid + "." + type;
} else { // 文件后缀不一致时以type为准(针对simText【将类txt文件转为txt】)
fileName = fileName.replace(fileName.substring(fileName.lastIndexOf(".") + 1), type);
}
String realPath = fileDir + fileName;
File dirFile = new File(fileDir);
if (!dirFile.exists()) {
dirFile.mkdirs();
if (!dirFile.exists() && !dirFile.mkdirs()) {
logger.error("创建目录【{}】失败,可能是权限不够,请检查", fileDir);
}
try {
URL url = new URL(urlStr);
if (url.getProtocol() != null && (url.getProtocol().toLowerCase().startsWith("file")||url.getProtocol().toLowerCase().startsWith("http"))) {
if (url.getProtocol() != null && (url.getProtocol().toLowerCase().startsWith("file") || url.getProtocol().toLowerCase().startsWith("http"))) {
byte[] bytes = getBytesFromUrl(urlStr);
OutputStream os = new FileOutputStream(realPath);
saveBytesToOutStream(bytes, os);
} else if (url.getProtocol() != null && "ftp".equalsIgnoreCase(url.getProtocol())) {
String ftpUsername = filePreviewCommonService.getUrlParameterReg(fileAttribute.getUrl(), URL_PARAM_FTP_USERNAME);
String ftpPassword = filePreviewCommonService.getUrlParameterReg(fileAttribute.getUrl(), URL_PARAM_FTP_PASSWORD);
String ftpControlEncoding = filePreviewCommonService.getUrlParameterReg(fileAttribute.getUrl(), URL_PARAM_FTP_CONTROL_ENCODING);
String ftpUsername = fileHandlerService.getUrlParameterReg(fileAttribute.getUrl(), URL_PARAM_FTP_USERNAME);
String ftpPassword = fileHandlerService.getUrlParameterReg(fileAttribute.getUrl(), URL_PARAM_FTP_PASSWORD);
String ftpControlEncoding = fileHandlerService.getUrlParameterReg(fileAttribute.getUrl(), URL_PARAM_FTP_CONTROL_ENCODING);
FtpUtils.download(fileAttribute.getUrl(), realPath, ftpUsername, ftpPassword, ftpControlEncoding);
} else {
response.setCode(1);
@ -73,8 +71,8 @@ public class DownloadUtils {
}
response.setContent(realPath);
response.setMsg(fileName);
if(FileType.simText.equals(fileAttribute.getType())){
convertTextPlainFileCharsetToUtf8(realPath);
if (FileType.simText.equals(fileAttribute.getType())) {
this.convertTextPlainFileCharsetToUtf8(realPath);
}
return response;
} catch (IOException e) {
@ -92,17 +90,15 @@ public class DownloadUtils {
public byte[] getBytesFromUrl(String urlStr) throws IOException {
InputStream is = getInputStreamFromUrl(urlStr);
if (is != null) {
return getBytesFromStream(is);
} else {
if (is == null) {
urlStr = URLUtil.normalize(urlStr, true, true);
is = getInputStreamFromUrl(urlStr);
if (is == null) {
logger.error("文件下载异常url{}", urlStr);
throw new IOException("文件下载异常url" + urlStr);
}
return getBytesFromStream(is);
}
return getBytesFromStream(is);
}
public void saveBytesToOutStream(byte[] b, OutputStream os) throws IOException {
@ -127,7 +123,7 @@ public class DownloadUtils {
private byte[] getBytesFromStream(InputStream is) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int len = 0;
int len;
while ((len = is.read(buffer)) != -1) {
baos.write(buffer, 0, len);
}
@ -137,41 +133,37 @@ public class DownloadUtils {
return b;
}
/**
* 转换文本文件编码为utf8
* 探测源文件编码,探测到编码切不为utf8则进行转码
* @param filePath 文件路径
*/
private static void convertTextPlainFileCharsetToUtf8(String filePath) throws IOException {
File sourceFile = new File(filePath);
if(sourceFile.exists() && sourceFile.isFile() && sourceFile.canRead()) {
String encoding = null;
try {
FileCharsetDetector.Observer observer = FileCharsetDetector.guessFileEncoding(sourceFile);
// 为准确探测到编码,不适用猜测的编
encoding = observer.isFound()?observer.getEncoding():null;
// 为准确探测到编码,可以考虑使用GBK 大部分文件都是windows系统产生的
} catch (IOException e) {
// 编码探测失败,
e.printStackTrace();
}
if(encoding != null && !"UTF-8".equals(encoding)){
// 不为utf8,进行转码
File tmpUtf8File = new File(filePath+".utf8");
Writer writer = new OutputStreamWriter(new FileOutputStream(tmpUtf8File), StandardCharsets.UTF_8);
Reader reader = new BufferedReader(new InputStreamReader(new FileInputStream(sourceFile),encoding));
char[] buf = new char[1024];
int read;
while ((read = reader.read(buf)) > 0){
writer.write(buf, 0, read);
/**
* 转换文本文件编码为utf8
* 探测源文件编码,探测到编码切不为utf8则进行转码
*
* @param filePath 文件路径
*/
private void convertTextPlainFileCharsetToUtf8(String filePath) throws IOException {
File sourceFile = new File(filePath);
if (sourceFile.exists() && sourceFile.isFile() && sourceFile.canRead()) {
String encoding = FileUtils.getFileEncode(filePath);
if (!FileUtils.DEFAULT_FILE_ENCODING.equals(encoding)) {
// 不为utf8,进行转
File tmpUtf8File = new File(filePath + ".utf8");
Writer writer = new OutputStreamWriter(new FileOutputStream(tmpUtf8File), StandardCharsets.UTF_8);
Reader reader = new BufferedReader(new InputStreamReader(new FileInputStream(sourceFile), encoding));
char[] buf = new char[1024];
int read;
while ((read = reader.read(buf)) > 0) {
writer.write(buf, 0, read);
}
reader.close();
writer.close();
// 删除源文件
if (!sourceFile.delete()) {
logger.error("源文件【{}】删除失败,请检查文件目录权限!", filePath);
}
// 重命名
if (tmpUtf8File.renameTo(sourceFile)) {
logger.error("临时文件【{}】重命名失败,请检查文件路径权限!", tmpUtf8File.getPath());
}
}
}
reader.close();
writer.close();
// 删除源文件
sourceFile.delete();
// 重命名
tmpUtf8File.renameTo(sourceFile);
}
}
}
}