ویکیپدیا:ویرایشگر خودکار/گروهبندی ارجاع
شروع به کار ویرایش
برای شروع مقالات موجود در اینجا را به کمک ماژول گروهبندی ارجاع، تمیز کنید.
- نکته: برای اجرای کد پایتون حتما باید پایتون ۳ بر روی سیستمتان نصب باشد و با پایتون ۲ کار نمیکند (البته با کمی تغییر در کد میتوانید آن را با پایتون ۲ هم سازگار کنید)
کدها ویرایش
- ماژول
public string ProcessArticle(string ArticleText, string ArticleTitle, int wikiNamespace, out string Summary, out bool Skip)
{
string OrigText = ArticleText;
Skip = false;
Summary = "+ ماژول گروهبندی ارجاع";
try
{
System.Diagnostics.Process process = new System.Diagnostics.Process();
System.Diagnostics.ProcessStartInfo psi = new System.Diagnostics.ProcessStartInfo();
psi.WindowStyle = System.Diagnostics.ProcessWindowStyle.Hidden;
psi.FileName = "python";
psi.Arguments = "DuplicateReferences.py";
using (System.IO.StreamWriter writer = new System.IO.StreamWriter("input.txt"))
writer.Write(ArticleText);
System.Diagnostics.Process p = System.Diagnostics.Process.Start(psi);
p.WaitForExit();
if (System.IO.File.Exists("output.txt"))
{
using (System.IO.StreamReader reader = System.IO.File.OpenText("output.txt"))
ArticleText = reader.ReadToEnd();
if (ArticleText == OrigText)
Skip = true;
}
else
Skip = true;
return ArticleText;
}
catch
{
Skip = true;
return OrigText;
}
}
- DuplicateReferences.py
#!/usr/bin/python
# -*- coding: utf-8 -*-
# BY: رضا (User:reza1615 on fa.wikipedia)
# Distributed under the terms of the CC-BY-SA 3.0.
import re,codecs
class DuplicateReferences:
def __init__(self):
# Match references
self.REFS = re.compile(
u'(?i)<ref(?P<params>[^>/]*)>(?P<content>.*?)</ref>')
self.NAMES = re.compile(
u'(?i).*name\s*=\s*(?P<quote>"?)\s*(?P<name>.+)\s*(?P=quote).*')
self.GROUPS = re.compile(
u'(?i).*group\s*=\s*(?P<quote>"?)\s*(?P<group>.+)\s*(?P=quote).*')
self.autogen = u'ToolAutoGenRef'
def process(self, text):
# keys are ref groups
# values are a dict where :
# keys are ref content
# values are [name, [list of full ref matches],
# quoted, need_to_change]
foundRefs = {}
foundRefNames = {}
# Replace key by [value, quoted]
namedRepl = {}
for match in self.REFS.finditer(text):
content = match.group('content')
if not content.strip():
continue
params = match.group('params')
group = self.GROUPS.match(params)
if group not in foundRefs:
foundRefs[group] = {}
groupdict = foundRefs[group]
if content in groupdict:
v = groupdict[content]
v[1].append(match.group())
else:
v = [None, [match.group()], False, False]
name = self.NAMES.match(params)
if name:
quoted = name.group('quote') == '"'
name = name.group('name')
if v[0]:
if v[0] != name:
namedRepl[name] = [v[0], v[2]]
else:
# First name associated with this content
if name == 'population':
pywikibot.output(content)
if name not in foundRefNames:
# first time ever we meet this name
#if name == 'population':
# print "in"
v[2] = quoted
v[0] = name
else:
# if has_key, means that this name is used
# with another content. We'll need to change it
v[3] = True
foundRefNames[name] = 1
groupdict[content] = v
id = 1
while self.autogen + str(id) in foundRefNames:
id += 1
for (g, d) in foundRefs.items():
if g:
group = u"group=\"%s\" " % group
else:
group = u""
for (k, v) in d.items():
if len(v[1]) == 1 and not v[3]:
continue
name = v[0]
if not name:
name = self.autogen + str(id)
id += 1
elif v[2]:
name = u'"%s"' % name
named = u'<ref %sname=%s>%s</ref>' % (group, name, k)
text = text.replace(v[1][0], named, 1)
# make sure that the first (named ref) is not
# removed later :
pos = text.index(named) + len(named)
header = text[:pos]
end = text[pos:]
unnamed = u'<ref %sname=%s />' % (group, name)
for ref in v[1][1:]:
end = end.replace(ref, unnamed)
text = header + end
for (k, v) in namedRepl.items():
# TODO : Support ref groups
name = v[0]
if v[1]:
name = u'"%s"' % name
text = re.sub(
u'<ref name\s*=\s*(?P<quote>"?)\s*%s\s*(?P=quote)\s*/>' % k,
u'<ref name=%s />' % name, text)
return text
text = codecs.open( u'input.txt','r' ,'utf8' )
text = text.read()
if u'{{پک' in text:
print ('article has template:pak')
new_text=text
else:
our_ref=DuplicateReferences()
new_text=our_ref.process(text)
with codecs.open(u'output.txt' ,mode = 'w',encoding = 'utf8' ) as f:
f.write(new_text)