diff --git a/app/lea/Util.go b/app/lea/Util.go index b8a2b21..f366762 100644 --- a/app/lea/Util.go +++ b/app/lea/Util.go @@ -11,6 +11,8 @@ import ( "gopkg.in/mgo.v2/bson" "time" "strings" + "github.com/PuerkitoBio/goquery" + "bytes" math_rand "math/rand" ) @@ -140,73 +142,48 @@ func ReplaceAll(oldStr, pattern, newStr string) string { } // 获取纯文本 -func SubStringHTMLToRaw(param string, length int) (result string) { +func SubStringHTMLToRaw(param string, length int) string { if param == "" { - return "" + return param } - result = "" n := 0 var temp rune // 中文问题, 用rune来解决 rStr := []rune(param) + lenStr := len(rStr) isCode := false - for i := 0; i < len(rStr); i++ { + + resultRune := make([]rune, length) + // s := "" + for i := 0; i < lenStr; i++ { temp = rStr[i] if temp == '<' { isCode = true continue } else if temp == '>' { isCode = false - result += " "; // 空格 + resultRune[n] = ' '; + + n++ + if n >= length { + break + } continue } if !isCode { - result += string(temp) + resultRune[n] = temp; + // s += string(temp) n++ if n >= length { break } } } - return + result := string(resultRune[0:n]) + return strings.Trim(result, " ") } -// 获取摘要, HTML -func SubStringHTML(param string, length int, end string) string { - if param == "" { - return "" - } - - // 先取出
占位.. - result := "" - - // 1 - n := 0 - var temp rune // 中文问题, 用rune来解决 - isCode := false //是不是HTML代码 - isHTML := false //是不是HTML特殊字符,如 - rStr := []rune(param) - for i := 0; i < len(rStr); i++ { - temp = rStr[i] - if temp == '<' { - isCode = true - } else if temp == '&' { - isHTML = true - } else if temp == '>' && isCode { - n = n - 1 - isCode = false - } else if temp == ';' && isHTML { - isHTML = false - } - if !isCode && !isHTML { - n = n + 1 - } - result += string(temp) - if n >= length { - break - } - } - result += end - +// 自带方法补全html +func fixHtml(result string) string { // 取出所有标签 tempResult := ReplaceAll(result, "(>)[^<>]*()", "$1$2") // 把标签中间的所有内容都去掉了 @@ -249,6 +226,71 @@ func SubStringHTML(param string, length int, end string) string { return result } +// 获取摘要, HTML +func SubStringHTML(param string, length int, end string) string { + if param == "" { + return param + } + result := "" + + rStr := []rune(param) + lenStr := len(rStr) + + if lenStr <= length { + result = param + } else { + // 1 + n := 0 + var temp rune // 中文问题, 用rune来解决 + isCode := false //是不是HTML代码 + isHTML := false //是不是HTML特殊字符,如 + var i = 0; + for ; i < lenStr; i++ { + temp = rStr[i] + if temp == '<' { + isCode = true + } else if temp == '&' { + isHTML = true + } else if temp == '>' && isCode { + // n = n - 1 + isCode = false + } else if temp == ';' && isHTML { + isHTML = false + } + if !isCode && !isHTML { + n = n + 1 + } + // 每一次都相加, 速度非常慢!, 重新分配内存, 7倍的差距 + // result += string(temp) + if n >= length { + break + } + } + + result = string(rStr[0:i]) + + if end != "" { + result += end + } + } + + // 使用goquery来取出html, 为了补全html + htmlReader := bytes.NewBufferString(result) + dom, err1 := goquery.NewDocumentFromReader(htmlReader) + if err1 == nil { + html, _ := dom.Html() + html = strings.Replace(html, "", "", 1) + html = strings.Replace(html, "", "", 1) + + // TODO 把style="float: left"去掉 + return html + + // 如果有错误, 则使用自己的方法补全, 有风险 + } else { + return fixHtml(result) + } +} + // 是否是合格的密码 func IsGoodPwd(pwd string) (bool, string) { if pwd == "" {