Skip to content

Commit 51f4429

Browse files
committed
Added support for language localization
Added support for language localization and other minor improvements in wiki code parsing
1 parent 2e63c41 commit 51f4429

File tree

15 files changed

+302
-120
lines changed

15 files changed

+302
-120
lines changed

README.md

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -83,16 +83,18 @@ Support libraries:
8383
- luabit\\\*.lua, lualib\\\*.lua and ustring\\\*.lua: lua scripts, reference: https://github.com/wikimedia/mediawiki-extensions-Scribunto/tree/master/includes/Engines/LuaCommon/lualib
8484

8585
# Caveats
86-
This wiki parser is simplified and light, but has several limitations. It is designed only for english wiktionary.
87-
It does not aim to achieve 100% features of php based wiki parser and it is not tested versus wikipedia.
86+
This wiki parser is simplified and light, but has several limitations.
87+
It does not aim to achieve 100% features of php based wiki parser and it is designed for the wiktionary.
8888
The parser is not optimized for speed.
8989

90-
# Todo
91-
The following future improvements will enhance the parser:
92-
- language localization
93-
- define missing namespaces in class NameSpaces
94-
- improve quality of html renderer
95-
- improve performance
90+
# Localization
91+
Language localization is supported by defining properties file in the ``wiki`` folder with filename wiktionary_\<language code\>.properties
92+
93+
The following optional properties may be defined in localization files:
94+
- ``thislanguage`` and ``language_pattern``: filter criteria to extract only wanted language
95+
- ``module``: localized label for Module
96+
- ``template``: localized label for Template
97+
- ``redirect``: localized label for Redirect
9698

9799
# Credits
98100
This software integrates 3pp software to perform specific activities.

info/bliki/extensions/scribunto/engine/lua/ScribuntoLuaEngine.java

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@
5353
import wiki.parserfunctions.ParserFunctions;
5454
import wiki.MagicWords;
5555
import wiki.tools.WikiPage;
56+
import static wiki.NameSpaces.getNameSpaceNumber;
57+
import static wiki.NameSpaces.getNameSpaceByNumber;
5658

5759
public final class ScribuntoLuaEngine implements MwInterface {
5860
private static final int MAX_EXPENSIVE_CALLS = 10;
@@ -67,8 +69,6 @@ public final class ScribuntoLuaEngine implements MwInterface {
6769

6870
private final WikiPage wp;
6971

70-
private final static String module_label = "Module:";
71-
7272
private final boolean debug = false;
7373

7474
public ScribuntoLuaEngine(WikiPage wp) {
@@ -112,7 +112,7 @@ public String invoke(String moduleName, String functionName, Frame parent, Map<S
112112
throw new ScribuntoException(e);
113113
}
114114
}
115-
final Frame frame = new Frame(module_label + moduleName, params, parent, isSubst);
115+
final Frame frame = new Frame(getNameSpaceByNumber(828) + ":" + moduleName, params, parent, isSubst);
116116
final LuaValue function = loadFunction(functionName, prototype, frame);
117117

118118
return executeFunctionChunk(function, frame);
@@ -499,8 +499,17 @@ private String fileNameForInterface(MwInterface luaInterface) {
499499
}
500500

501501
private InputStream findPackage(String name) throws IOException {
502-
if (name.startsWith(module_label)) {//TODO: handle also alias and language localizations
503-
return findModule(name.substring(module_label.length()));
502+
boolean isModule = false;
503+
int idx = name.indexOf(":");
504+
if (idx != -1) {
505+
String ns = name.substring(0, idx);
506+
Integer ns_id = getNameSpaceNumber(ns);
507+
if (ns_id != null && ns_id == 828)
508+
isModule = true;
509+
}
510+
511+
if (isModule) {
512+
return findModule(name.substring(idx + 1));
504513
} else {
505514
InputStream is = globals.finder.findResource(name+".lua");
506515
if (is != null) {

wiki/MagicWords.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ public enum MagicWord {
7272
filepath,
7373
fullpagename,
7474
fullpagenamee,
75+
_int_,
7576
lc,
7677
lcfirst,
7778
localday,
@@ -152,6 +153,8 @@ public static MagicWord get(String name) {
152153
return MagicWord._p_i_p_e_;
153154
if (name.equals("="))
154155
return MagicWord._e_q_u_a_l_;
156+
if (name.equals("int"))
157+
return MagicWord._int_;
155158
try {
156159
return MagicWord.valueOf(name.toLowerCase());
157160
} catch (IllegalArgumentException ex) {
@@ -312,6 +315,8 @@ public static String evaluate(MagicWord magicWord, String parameter, String titl
312315
return "|";
313316
case _e_q_u_a_l_:
314317
return "=";
318+
case _int_:
319+
return parameter;//todo: implement logic for intFunction as in CoreParserFunctions.php
315320
default:
316321
break;
317322
}
@@ -344,13 +349,13 @@ private static String getRootPageName(String parameter, String title) {
344349
}
345350

346351
private static String getFullpagename(String parameter, String title) {
347-
if (parameter != null && parameter.length() > 0)
352+
if (parameter != null && !parameter.isEmpty())
348353
return parameter;
349354
else return title;
350355
}
351356

352357
private static String getPagenameHelper(String parameter, String title) {
353-
if (parameter != null && parameter.length() > 0)
358+
if (parameter != null && !parameter.isEmpty())
354359
title = parameter;
355360
int idx = title.indexOf(':');
356361
if (idx != -1) {

wiki/NameSpaces.java

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,13 @@ public Integer get_associated() {
129129
return associated;
130130
}
131131

132+
public void add_alias(String alias) {
133+
if (aliases == null)
134+
throw new RuntimeException("unsupported operation for namespace " + canonicalName);
135+
if (!aliases.contains(alias))
136+
aliases.add(alias);
137+
}
138+
132139
}
133140

134141
private final static HashMap<Integer, NameSpace> namespaces = new HashMap<>();
@@ -139,30 +146,34 @@ public Integer get_associated() {
139146
namespaces.put(1, new NameSpace(1, "Talk", "Talk", true, false, true, false, true, false, false, true, null, 0, 1, 0));
140147
namespaces.put(2, new NameSpace(2, "User", "User", true, true, true, false, true, false, true, false, null, 2, 3, 3));
141148
namespaces.put(3, new NameSpace(3, "User talk", "User talk", true, true, true, false, true, false, false, true, null, 2, 3, 2));
142-
namespaces.put(4, new NameSpace(4, "Project", "Project", true, false, true, false, true, false, true, false, Arrays.asList("Meta", "WP", "Wiktionary"), 4, 5, 5));
143-
namespaces.put(5, new NameSpace(5, "Project talk", "Project talk", true, false, true, false, true, false, false, true, Arrays.asList("Meta_talk", "WT", "Wiktionary_talk"), 4, 5, 4));
144-
namespaces.put(6, new NameSpace(6, "File", "File", false, false, true, false, true, false, true, false, Collections.singletonList("Image"), 6, 7, 7));
145-
namespaces.put(7, new NameSpace(7, "File talk", "File talk", true, false, true, false, true, false, false, true, Collections.singletonList("Image_talk"), 6, 7, 6));
149+
namespaces.put(4, new NameSpace(4, "Project", "Project", true, false, true, false, true, false, true, false, new java.util.ArrayList<>(Arrays.asList("WT", "Wiktionary")), 4, 5, 5));
150+
namespaces.put(5, new NameSpace(5, "Project talk", "Project talk", true, false, true, false, true, false, false, true, new java.util.ArrayList<>(Arrays.asList("Wiktionary_talk")), 4, 5, 4));
151+
namespaces.put(6, new NameSpace(6, "File", "File", false, false, true, false, true, false, true, false, new java.util.ArrayList<>(Arrays.asList("Image")), 6, 7, 7));
152+
namespaces.put(7, new NameSpace(7, "File talk", "File talk", true, false, true, false, true, false, false, true, new java.util.ArrayList<>(Arrays.asList("Image_talk")), 6, 7, 6));
146153
namespaces.put(8, new NameSpace(8, "MediaWiki", "MediaWiki", true, false, true, false, true, false, true, false, null, 8, 9, 9));
147154
namespaces.put(9, new NameSpace(9, "MediaWiki talk", "MediaWiki talk", true, false, true, false, true, false, false, true, null, 8, 9, 8));
148-
namespaces.put(10, new NameSpace(10, "Template", "Template", false, false, true, false, true, false, true, false, Collections.singletonList("T"), 10, 11, 11));
155+
namespaces.put(10, new NameSpace(10, "Template", "Template", false, false, true, false, true, false, true, false, new java.util.ArrayList<>(Arrays.asList("T")), 10, 11, 11));
149156
namespaces.put(11, new NameSpace(11, "Template talk", "Template talk", true, false, true, false, true, false, false, true, null, 10, 11, 10));
150157
namespaces.put(12, new NameSpace(12, "Help", "Help", true, false, true, false, true, false, true, false, null, 12, 13, 13));
151158
namespaces.put(13, new NameSpace(13, "Help talk", "Help talk", true, false, true, false, true, false, false, true, null, 12, 13, 12));
152-
namespaces.put(14, new NameSpace(14, "Category", "Category", false, false, true, false, true, false, true, false, Collections.singletonList("CAT"), 14, 15, 15));
159+
namespaces.put(14, new NameSpace(14, "Category", "Category", false, false, true, false, true, false, true, false, new java.util.ArrayList<>(Arrays.asList("CAT")), 14, 15, 15));
153160
namespaces.put(15, new NameSpace(15, "Category talk", "Category talk", true, false, true, false, true, false, false, true, null, 14, 15, 14));
154161

155-
namespaces.put(100, new NameSpace(100, "Portal", "Portal", false, false, true, false, true, false, true, false, Arrays.asList("AP", "Appendix"), 100, 101, 101));
156-
namespaces.put(101, new NameSpace(101, "Portal talk", "Portal talk", false, false, true, false, true, false, true, false, Collections.singletonList("Appendix_talk"), 100, 101, 100));
162+
namespaces.put(100, new NameSpace(100, "Portal", "Portal", false, false, true, false, true, false, true, false, new java.util.ArrayList<>(Arrays.asList("AP", "Appendix")), 100, 101, 101));
163+
namespaces.put(101, new NameSpace(101, "Portal talk", "Portal talk", false, false, true, false, true, false, true, false, new java.util.ArrayList<>(Arrays.asList("Appendix_talk")), 100, 101, 100));
157164

158-
namespaces.put(828, new NameSpace(10, "Module", "Module", false, false, true, false, true, false, true, false, Collections.singletonList("MOD"), 828, 829, 829));
165+
namespaces.put(828, new NameSpace(828, "Module", "Module", false, false, true, false, true, false, true, false, new java.util.ArrayList<>(Arrays.asList("MOD")), 828, 829, 829));
159166
namespaces.put(829, new NameSpace(829, "Module talk", "Module talk", false, false, true, false, true, false, false, true, null, 828, 829, 828));
160167
}
161168

162169
public static HashMap<Integer, NameSpace> getNameSpaces() {
163170
return namespaces;
164171
}
165172

173+
public static NameSpace getNameSpace(int numnamespace) {
174+
return namespaces.get(numnamespace);
175+
}
176+
166177
public static boolean isNameSpace(String namespace) {
167178
return getNameSpaceNumber(namespace) != null;
168179
}

wiki/TemplateParser.java

Lines changed: 49 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,18 @@
3232
import wiki.MagicWords;
3333
import wiki.tools.WikiScanner;
3434
import wiki.tools.WikiPage;
35+
import static wiki.NameSpaces.getNameSpaceNumber;
36+
import static wiki.NameSpaces.getNameSpaceByNumber;
3537
import static wiki.tools.Utilities.deleteAll;
3638
import static wiki.tools.Utilities.flipTemplateName;
3739
import static wiki.tools.Utilities.process_include;
3840
import info.bliki.extensions.scribunto.template.Frame;
41+
3942
/*
4043
The class TemplateParser implements light wiki template parser.
4144
*/
4245
final public class TemplateParser {
4346

44-
private final static String template_label = "Template:";
45-
private final static String lc_template_label = template_label.toLowerCase();
46-
4747
//main method parse string, returns evaluated string
4848
public String parse(String string, WikiPage wp) {//external
4949
StringBuilder sb = new StringBuilder();
@@ -71,18 +71,21 @@ private void template_body(WikiScanner sh, StringBuilder sb, WikiPage wp, Frame
7171
while (sh.getSequence("{{")) {
7272
int pointer = sh.getPointer(); //save pointer to be ready to retract in case of missing }}
7373

74-
if (sh.getChar('{')) {
75-
if (parameter_holder(sh, sb, wp, parent)) {
76-
String str = sh.getStringWithoutOpening();//twin
77-
if (str != null)
78-
sb.append(str);
79-
continue;
80-
}
74+
if (sh.getSequence("{{")) {//handling cases like {{{{LC:Blabla}} ... }} and {{{{{1}}} ... }}
75+
sh.setPointer(pointer);//retract scanner
76+
} else if (sh.getChar('{')) {
77+
if (parameter_holder(sh, sb, wp, parent)) {
78+
String str = sh.getStringWithoutOpening();//twin
79+
if (str != null)
80+
sb.append(str);
81+
continue;
82+
}
8183
//here we have a pending literal {
82-
sh.setPointer(pointer - 1);//partially retract scanner
83-
sb.append("{");//save orphan { as literal
84-
continue;
84+
sh.setPointer(pointer - 1);//partially retract scanner
85+
sb.append("{");//save orphan { as literal
86+
continue;
8587
}
88+
8689
String p = invocation_body(sh, wp, parent);
8790
if (p != null) {
8891
sb.append(p);
@@ -129,7 +132,7 @@ private String invocation_body(WikiScanner sh, WikiPage wp, Frame parent) {
129132
//magic_word_call ::= magic_word [ ":" magic_parameter]
130133
//parser_function_call ::= parser_function_name ":" parser_function_parameter { "|" [parser_function_parameter] }*
131134
//template_call ::= template_identifier { "|" [template_parameter] }*
132-
int pointer0 = sh.getPointer(); //save pointer to be ready to retract
135+
// int pointer0 = sh.getPointer(); //zzrifletti save pointer to be ready to retract
133136

134137
String identifier = sh.getStringParameter(null);
135138
if (identifier == null)
@@ -140,25 +143,30 @@ private String invocation_body(WikiScanner sh, WikiPage wp, Frame parent) {
140143
}
141144
if (identifier.startsWith("subst:")) {//ignore subst:
142145
identifier = identifier.substring("subst:".length());
143-
pointer0 += "subst:".length();
146+
// pointer0 += "subst:".length();zzrifletti
144147
}
145148
if (identifier.startsWith("safesubst:")) {//ignore safesubst:
146149
identifier = identifier.substring("safesubst:".length());
147-
pointer0 += "safesubst:".length();
150+
// pointer0 += "safesubst:".length();zzrifletti
148151
}
149-
identifier = parseParameter(identifier, wp, parent);
152+
//System.out.println(">0>>"+identifier);//zzrifletti
153+
identifier = parseParameter(identifier, wp, parent).trim();
150154
int pointer = sh.getPointer(); //save pointer to be ready to retract in case of invalid magic word or parser function
151155
//check & process magic word
152156
int idx = identifier.indexOf(":");
153157
String name = idx != -1 ? identifier.substring(0, idx) : identifier;
154158
MagicWords.MagicWord mw = MagicWords.get(name);
155159
if (mw != null) {
160+
//System.out.println(">>>"+identifier);//zzrifletti
161+
//System.out.println("mw>>>"+name);//zzrifletti
156162
String parameter = null;
157163
if (idx != -1) {//parameter present
158-
sh.setPointer(pointer0);//retract scanner at start of identifier
159-
sh.moveAfter(":");//move after : to get parameter
164+
// sh.setPointer(pointer0);//zzrifletti retract scanner at start of identifier
165+
// sh.moveAfter(":");//zzrifletti move after : to get parameter
160166

161-
String param = sh.getStringParameter(null);
167+
// String param = sh.getStringParameter(null);zzrifletti
168+
String param = identifier.substring(idx + 1);
169+
//System.out.println("param>>>"+param);//zzrifletti
162170
parameter = param == null ? "" : parseParameter(param, wp, parent).trim();
163171

164172
while (sh.getChar('|')) {//ignore any further parameter(s)
@@ -167,6 +175,8 @@ private String invocation_body(WikiScanner sh, WikiPage wp, Frame parent) {
167175
}
168176
if (sh.getSequence("}}")) {
169177
String result = MagicWords.evaluate(mw, parameter, wp.getPagename(), wp.getRevision());
178+
//System.out.println("parameter>>>"+parameter);//zzrifletti
179+
//System.out.println("result>>>"+result);//zzrifletti
170180
if (result != null)
171181
return result;
172182
sh.setPointer(pointer);//retract scanner
@@ -176,11 +186,12 @@ private String invocation_body(WikiScanner sh, WikiPage wp, Frame parent) {
176186
ParserFunction pf = ParserFunctions.get(name);
177187
if (pf != null) {
178188
if (idx != -1) {//first parameter present
179-
sh.setPointer(pointer0);//retract scanner at start of identifier
180-
sh.moveAfter(":");//move after : to get parameter
189+
// sh.setPointer(pointer0);//zzrifletti retract scanner at start of identifier
190+
// sh.moveAfter(":");//zzrifletti move after : to get parameter
181191

182192
ArrayList<String> parameters = new ArrayList<>();
183-
String param = sh.getStringParameter(null);
193+
// String param = sh.getStringParameter(null);zzrifletti
194+
String param = identifier.substring(idx + 1);
184195
parameters.add(param == null ? "" : param.trim());
185196

186197
while (sh.getChar('|')) {//twin
@@ -196,8 +207,17 @@ private String invocation_body(WikiScanner sh, WikiPage wp, Frame parent) {
196207
}
197208
if (!identifier.contains("#")) {
198209
//check & process template call
199-
if (identifier.toLowerCase().startsWith(lc_template_label)) {//TODO: handle also alias and language localizations
200-
identifier = identifier.substring(template_label.length());//remove template namespace
210+
boolean isTemplate = false;
211+
int idx1 = identifier.indexOf(":");
212+
if (idx1 != -1) {
213+
String ns = identifier.substring(0, idx1);
214+
Integer ns_id = getNameSpaceNumber(ns);
215+
if (ns_id != null && ns_id == 10)
216+
isTemplate = true;
217+
}
218+
219+
if (isTemplate) {
220+
identifier = identifier.substring(idx1 + 1);//remove template namespace
201221
}
202222
int pos = 1;
203223
Map<String, String> parameterMap = new LinkedHashMap<>();
@@ -232,25 +252,24 @@ private String invocation_body(WikiScanner sh, WikiPage wp, Frame parent) {
232252

233253
public String getParsedTemplate(String identifier, WikiPage wp, Map<String, String> parameterMap, Frame parent) {
234254
boolean trace_calls = wp.getTrace_calls();
235-
236255
while (true) {
237256
String parsed_template = null;
238257
String template_text = wp.getTemplate(identifier);
239258
if (template_text != null) {
240259
if (!detect_loop(identifier, parent)) {
241260
template_text = process_include(delete_comments(template_text), true).replace("{{{|safesubst:}}}", "");//twin in TestSuite
242-
String redirect = getRedirect(template_text);
261+
String redirect = wp.getRedirect(template_text);
243262
if (redirect != null) {
244263
identifier = redirect;
245264
continue;
246265
}
247266

248267
if (trace_calls) {
249-
System.out.print(template_label + identifier + "(");
268+
System.out.print(getNameSpaceByNumber(10) + ":" + identifier + "(");
250269
parameterMap.forEach((name, value) -> System.out.print(name + (value.isEmpty() ? "" : " = " + value) + ", "));
251270
System.out.println(")");
252271
}
253-
Frame frame = new Frame(template_label + identifier, parameterMap, parent, false);//frame of this template
272+
Frame frame = new Frame(getNameSpaceByNumber(10) + ":" + identifier, parameterMap, parent, false);//frame of this template
254273
StringBuilder sb = new StringBuilder();
255274
WikiScanner sh = new WikiScanner(delete_comments(template_text));
256275
template_body(sh, sb, wp, frame);
@@ -262,7 +281,7 @@ public String getParsedTemplate(String identifier, WikiPage wp, Map<String, Stri
262281
} else {
263282
if (trace_calls)
264283
System.out.println("Warning: template not found:" + identifier);
265-
return "[["+ template_label + identifier + "]]";
284+
return "[["+ getNameSpaceByNumber(10) + ":" + identifier + "]]";
266285
}
267286
}
268287
}
@@ -279,21 +298,6 @@ private boolean detect_loop(String identifier, Frame parent) {
279298
return false;
280299
}
281300

282-
public String getRedirect(String template_text) {
283-
int ibrac;
284-
if (template_text.length() > 9 && template_text.charAt(0) == '#' && ((ibrac = template_text.indexOf("[[")) != -1)) {
285-
String checkRedirect = template_text.substring(0, ibrac).toLowerCase();
286-
if (checkRedirect.startsWith("#redirect")) {
287-
int icolon = template_text.indexOf(":", ibrac);
288-
int ebrac = template_text.indexOf("]]", ibrac);
289-
if ((ebrac != -1) && (icolon != -1) && (icolon < ebrac)) {
290-
return template_text.substring(icolon + 1, ebrac).trim();
291-
}
292-
}
293-
}
294-
return null;//no redirect
295-
}
296-
297301
private static String delete_comments(String str) { // delete html comments <!-- -->
298302
StringBuilder text = new StringBuilder(str);
299303
int comment;

0 commit comments

Comments
 (0)