Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

允许更多的附件文件名称规则 #43

Merged
merged 2 commits into from
Jul 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 45 additions & 31 deletions chrome/content/scripts/jasminum.js
Original file line number Diff line number Diff line change
Expand Up @@ -186,41 +186,55 @@ Zotero.Jasminum = {
splitFilename: function (filename) {
// Make query parameters from filename
var patent = Zotero.Prefs.get("jasminum.namepatent");
var patentArr = patent.split("_");
var patentSepArr = patent.split(/{%[^}]+}/);
var patentSepRegArr = patentSepArr.map(x => x.replace(/([\[\\\^\$\.\|\?\*\+\(\)])/g,'\\$&'));
var patentMainArr = patent.match(/{%[^}]+}/g);
//文件名中的作者姓名字段里不能包含下划线,请使用“&,,”等字符分隔多个作者,或仅使用第一个作者名(加不加“等”都行)。
var patentMainRegArr = patentMainArr.map(x => x.replace(/.+/,/{%y}/.test(x)?'(\\d+)':(/{%a}/.test(x)?'([^_]+)':'(.+)')));
var regStrInterArr = patentSepRegArr.map((_,i)=>[patentSepRegArr[i],patentMainRegArr[i]]);
var patentReg = new RegExp([].concat.apply([],regStrInterArr).filter(Boolean).join(''),'g');
var prefix = filename.substr(0, filename.length - 4);
var prefix = prefix.replace(/\.ashx$/g, ""); // 删除末尾.ashx字符
var author = "";
var title = "";
if (prefix.includes("_")) {
// 有下划线
// Remove year string
if (patent.includes("{%y}")) {
patentArr.splice(patentArr.indexOf("{%y}"), 1);
prefix = prefix.replace(/[0-9]{4}[\._]/g, "");
}
var prefixArr = prefix.replace(/^_|_$/g, "").split("_");
console.log(patentArr);
console.log(prefixArr);
if (patentArr.includes("{%g}")) {
var authorIdx = patentArr.indexOf("{%g}");
var authorIdx = authorIdx === 0 ? 0 : prefixArr.length - 1;
console.log(authorIdx);
author = prefixArr[authorIdx];
prefixArr.splice(authorIdx, 1);
var missIndex = prefixArr.indexOf("省略");
if (missIndex > 0) {
prefixArr.splice(missIndex - 1, 3); // Delete before and after 省略
}
title = prefixArr.join(" ");
} else {
title = prefixArr.join(" ");
}
} else {
// 无下划线直接把文件名认为title
title = prefix;
var prefixMainArr = patentReg.exec(prefix);
var titleIdx = patentMainArr.indexOf('{%t}');
var authorIdx = patentMainArr.indexOf('{%a}');
var titleRaw = (titleIdx!=-1)?prefixMainArr[titleIdx+1]:'';
var authors = (authorIdx!=-1)?prefixMainArr[authorIdx+1]:'';
var authorArr = authors.split(/[,,&]/);
var author = authorArr[0]
if (authorArr.length == 1){
//删除名字后可能出现的“等”字,此处未能做到识别该字是否属于作者姓名。
//这种处理方式的问题:假如作者名最后一个字为“等”,例如:“刘等”,此时会造成误删。
//于是对字符数进行判断,保证删除“等”后,至少还剩两个字符,尽可能地避免误删。

author = (author.endsWith('等')&&author.length>2)?author.substr(0, author.length - 1):author;
}

//为了避免文件名中的标题字段里存在如下两种情况而导致的搜索失败:
//原标题过长,文件名出现“_省略_”;
//原标题有特殊符号(如希腊字母、上下标)导致的标题变动,此时标题也会出现“_”。
//于是只取用标题中用“_”分割之后的最长的部分作为用于搜索的标题。

//这种处理方式的问题:假如“最长的部分”中存在知网改写的部分,也可能搜索失败。
//不过这只是理论上可能存在的情形,目前还未实际遇到。

var title;

if (/_/.test(titleRaw)){

//getLongestText函数,用于拿到字符串数组中的最长字符
//摘自https://stackoverflow.com/a/59935726
const getLongestText = (arr) => arr.reduce(
(savedText, text) => (text.length > savedText.length ? text : savedText),
'',
);
title = getLongestText(titleRaw.split(/_/));
}else{
title = titleRaw;
}

return {
author: author.replace(",", ""),
author: author,
keyword: title,
};
},
Expand Down
2 changes: 1 addition & 1 deletion chrome/locale/en-US/overlay.dtd
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
<!ENTITY jasminum.pdftk.info "Jasminum use PDFtk server to add bookmarks. Please install PDFtk server properly and choose folder contains PDFtk server execute file">
<!ENTITY pdftk.path.check.ok "PDFtk server path is OK.">
<!ENTITY pdftk.path.check.error "Can't find PDFtk execute file in this folder.">
<!ENTITY jasminum.namepatent.desc "Filename format:{&#37;g}=Chinese fullname,{&#37;t}=title, {&#37;y}=year, seperate by '_',ignore file extension">
<!ENTITY jasminum.namepatent.desc "Filename format:{&#37;t}=title,{&#37;a}=author, {&#37;y}=year,{&#37;j}=other info(such as source), no limit on delimiters,ignore file extension">
<!ENTITY jasminum.zhnamesplit.label "Split firstname and lastname when get metadata(Including CNKI web translators)">
<!ENTITY jasminum.rename.label "Use Zotfile to rename attachment(Need Zotfile installed)">
<!ENTITY jasminum.autobookmark.label "Add bookmark to thesis PDF when adding thesis item from CNKI">
Expand Down
2 changes: 1 addition & 1 deletion chrome/locale/zh-CN/overlay.dtd
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
<!ENTITY jasminum.pdftk.info "添加书签功能依赖于PDFtk server,正确安装并设置好安装目录才能实现书签添加功能,请选择PDFtk可执行文件所在目录">
<!ENTITY pdftk.path.check.ok "PDFtk Server安装目录检查通过">
<!ENTITY pdftk.path.check.error "未在该目录下发现pdftk执行程序">
<!ENTITY jasminum.namepatent.desc "根据文件名抓取知网元数据,文件名格式设置:{&#37;g}=全名,{&#37;t}=标题名,{&#37;y}=年份,以下划线_分隔,不用考虑文件后缀名">
<!ENTITY jasminum.namepatent.desc "根据文件名抓取知网元数据,文件名格式设置:{&#37;t}=标题,{&#37;a}=作者,{&#37;y}=年份,{&#37;j}=其他(例如来源信息);分隔符依实情指定,可连续使用多个;不用考虑文件后缀名">
<!ENTITY jasminum.zhnamesplit.label "抓取题录信息时拆分中文姓,名(包括知网页面抓取)">
<!ENTITY jasminum.rename.label "拉取知网元数据后调用Zotfile重命名附件(需安装Zotfile)">
<!ENTITY jasminum.autobookmark.label "从知网添加学位论文时自动添加书签">
Expand Down