Skip to content

Commit 2e63c41

Browse files
committed
improved interwiki links
1 parent 6ae2a87 commit 2e63c41

File tree

3 files changed

+39
-54
lines changed

3 files changed

+39
-54
lines changed

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ The parser is not optimized for speed.
9090
# Todo
9191
The following future improvements will enhance the parser:
9292
- language localization
93-
- improve interwiki links
9493
- define missing namespaces in class NameSpaces
9594
- improve quality of html renderer
9695
- improve performance

wiki.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

wiki/tools/WikiFormatter.java

Lines changed: 38 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -461,65 +461,51 @@ private static void applyRules(StringBuilder sb, StringBuilder result, String li
461461
idx = idxk;
462462
}
463463
int idxb = sb.indexOf("|", idx + 2);
464+
String baseURL = null, display_text = null, internal_link;
464465
if ((idxb != -1) && (idxb < idx2)) {
465-
String keyword = sb.substring(idx + 2, idxb).trim(); // +2 == "[[".length()
466-
if (keyword.startsWith(category_label)) {//suppress category, example: [[Category:English abbreviations|CROSS]]
466+
internal_link = sb.substring(idx + 2, idxb).trim(); // +2 == "[[".length()
467+
display_text = sb.substring(idxb + 1, idx2);
468+
} else internal_link = sb.substring(idx + 2, idx2).trim();
469+
String path = internal_link;
470+
if (internal_link.startsWith(category_label)) {//suppress category, example: [[Category:English abbreviations|CROSS]]
471+
sb.delete(idx, idx2 + 2);
472+
len = sb.length();
473+
continue;
474+
}
475+
idxc = internal_link.indexOf(":", 1);
476+
if (idxc != -1) {
477+
String media = internal_link.substring(0, idxc).toLowerCase();
478+
if (media.startsWith(":"))
479+
media = media.substring(1);//remove initial :
480+
if (not_allowed_media.contains(media)) {
467481
sb.delete(idx, idx2 + 2);
468482
len = sb.length();
469483
continue;
470484
}
471-
idxc = keyword.indexOf(":", 1);
472-
String path = keyword;
473-
String baseURL = null;
474-
if (idxc != -1) {
475-
String media = keyword.substring(0, idxc).toLowerCase();
476-
if (media.startsWith(":"))
477-
media = media.substring(1);//remove initial :
478-
if (not_allowed_media.contains(media)) {
479-
sb.delete(idx, idx2 + 2);
480-
len = sb.length();
481-
continue;
482-
}
483-
if (code2language.containsKey(media)) {
484-
baseURL = String.format(linkBaseURL, media);
485-
path = keyword.substring(idxc + 1);
486-
} else if (media.equals("w")) {
487-
baseURL = "https://" + language + ".wikipedia.org/wiki/";
488-
path = keyword.substring(idxc + 1);
489-
}
490-
}
491-
if (baseURL == null)
492-
baseURL = String.format(linkBaseURL, language);
493-
result.append(sb, last, idx);
494-
result.append("<a href=\"").append(baseURL).append(path).append("\">").append(sb, idxb+1, idx2).append("</a>");
495-
last = idx2 + 2;
496-
ids = last;
497-
} else { // here we are not in the case: [[..[[...]]
498-
String arg1 = sb.substring(idx + 2, idx2);
499-
idxd = arg1.indexOf('#');
500-
if (idxd != -1)
501-
arg1 = arg1.substring(0, idxd);
502-
String url_text = arg1;
503-
idxc = arg1.indexOf(":");
504-
//TODO: specific handling for interwiki [[:w:....]]
505-
String scheme;
506-
if (idxc != -1) {
507-
scheme = arg1.substring(0, idxc + 1);
508-
url_text = arg1.substring(idxc + 1);
509-
} else scheme = String.format(linkBaseURL, language);
510-
511-
512-
result.append(sb, last, idx);
513-
result.append("<a href=\"").append(scheme);
514-
try {
515-
result.append(URLEncoder.encode(url_text, "UTF-8"));
516-
} catch (UnsupportedEncodingException e) {
517-
result.append(url_text); //fallback old way...
485+
if (code2language.containsKey(media)) {
486+
baseURL = String.format(linkBaseURL, media);
487+
path = internal_link.substring(idxc + 1);
488+
} else if (media.equals("w")) {
489+
baseURL = "https://" + language + ".wikipedia.org/wiki/";
490+
path = internal_link.substring(idxc + 1);
491+
/* } else {
492+
baseURL = internal_link.substring(0, idxc + 1);
493+
path = internal_link.substring(idxc + 1);*/
518494
}
519-
result.append("\">").append(arg1).append("</a>");
520-
last = idx2 + 2;
521-
ids = last;
522495
}
496+
if (baseURL == null)
497+
baseURL = String.format(linkBaseURL, language);
498+
result.append(sb, last, idx);
499+
result.append("<a href=\"").append(baseURL);
500+
try {
501+
result.append(URLEncoder.encode(path, "UTF-8"));
502+
} catch (UnsupportedEncodingException e) {
503+
result.append(path); //fallback old way...
504+
}
505+
result.append("\">").append(display_text == null ? internal_link : display_text).append("</a>");
506+
507+
last = idx2 + 2;
508+
ids = last;
523509
} else ids++;
524510
}
525511
}

0 commit comments

Comments
 (0)