ویکیپدیا:درخواستهای ربات/ربات جراح پلاستیک
این ربات مقالاتی را که از محل قرار گیری عنوانها مشکل دارند را تصحیح میکند همچنین مقالاتی را که منبع یا پانویس ندارند را گزارش میدهد و خود عنوان منابع را در پایین میافزاید.
کد ربات ویرایش
#!/usr/bin/python
# -*- coding: utf-8 -*-
# BY: Z (User:ZxxZxxZ on fa.wikipedia)
# BY: رضا (User:reza1615 on fa.wikipedia)
# Distributed under the terms of the CC-BY-SA 3.0 .
"""
This bot will make direct text replacements. It will retrieve information on
which pages might need changes either from an XML dump or a text file, or only
change a single page.
You can run the bot with the following commandline parameters:
-file - Work on all pages given in a local text file.
Will read any [[wiki link]] and use these articles.
Argument can also be given as "-file:filename".
-cat - Work on all pages which are in a specific category.
Argument can also be given as "-cat:categoryname".
-page - Only edit a specific page.
Argument can also be given as "-page:pagetitle". You can give this
parameter multiple times to edit multiple pages.
-ref - Work on all pages that link to a certain page.
Argument can also be given as "-ref:referredpagetitle".
-filelinks - Works on all pages that link to a certain image.
Argument can also be given as "-filelinks:ImageName".
-links - Work on all pages that are linked to from a certain page.
Argument can also be given as "-links:linkingpagetitle".
-start - Work on all pages in the wiki, starting at a given page. Choose
"-start:!" to start at the beginning.
NOTE: You are advised to use -xml instead of this option; this is
meant for cases where there is no recent XML dump.
-except:XYZ - Ignore pages which contain XYZ. If the -regex argument is given,
XYZ will be regarded as a regular expression.
-summary:XYZ - Set the summary message text for the edit to XYZ, bypassing the
predefined message texts with original and replacements inserted.
-template:XYZ-
-blog: -checking for blog sources. if it is in page it will sent page link to defined address
-source - checking the articles sources . if it doesn't have . it will send page link to defined address
-namespace:n - Number of namespace to process. The parameter can be used
multiple times. It works in combination with all other
parameters, except for the -start parameter. If you e.g. want to
iterate over all user pages starting at User:M, use
-start:User:M.
-always - Don't prompt you for each replacement
other: -
NOTE: Only use either -xml or -file or -page, but don't mix them.
Examples:
behsaz.py -start:! برای تمام صفحات ویکی
or
behsaz.py -page:آرامگاه کورش
"""
import wikipedia
import pagegenerators
import re, os, codecs, catlib,string
docuReplacements = {
'¶ms;': pagegenerators.parameterHelp
}
page=False
txtTmp=' '
msg = u' ربات: مرتب کننده مقالهها و یونیکد %s ([[وپ:درخواستهای ربات/ربات جراح پلاستیک|کد]])'
dict={}
list=[]
manadd=' '
regex = ((u'pan',ur'\=+\s?پانویس\s?\=+'), (u'jos',ur'\=+\s?جستارهای وابسته\s?\=+'),
(u'man',ur'\=+\s?منابع\s?\=+'), (u'piv',ur'\=+\s?پیوند به بیرون\s?\=+'),
(u'cat',ur'\[\[([Cc]ategory|رده):.*?\]\]'), (u'itw',ur'\[\[([a-z]{2,3}|[a-z]{2,3}\-[a-z\-]{2,}|simple):.*?\]\]'))
def unicodemaker(text):
new_text=''
for lines in text.split('\n'):
lines=unicode(lines).strip()
new_text=new_text+lines.strip()+'\n'
text=new_text.strip()
return text
def checkbug(text):
textadd=' '
for lines in text.split('\n'):
if lines==']':
lines=' '
if lines=='[':
lines=' '
textadd=textadd+lines.strip()+'\n'
text=textadd.strip()
return text
def textcorections(txtTmp):
txtTmp = re.sub(ur'\=\s*پانو[یي]س[ ]?(ها)?\s*\=', ur'= پانویس =', txtTmp)
txtTmp = re.sub(ur'\=\s*جُ?ستار(ها[یي])? (وابسته|د[یي]گر|مرتبط|مشابه)\s*\=', ur'= جستارهای وابسته =', txtTmp)
txtTmp = re.sub(ur'\=\s*(منا?بع|منبع[ ]?ها)\s*\=', ur'= منابع =', txtTmp)
txtTmp = re.sub(ur'\=\s*(پ[یي]وند|ل[یي]ن[کك])[ ]?(ها[یي])? (به[ ]ب[یي]رون|ب[یي]رون[یي]|خارج[یي])\s*\=', ur'= پیوند به بیرون =', txtTmp)
return txtTmp
def arrangepage(text, page, manadd):
jos,pan,man,piv,cat,itw,khr=' ',' ',' ',' ',' ',' ',' '
page = str(page).replace('[[','').replace(']]','')
def arrangeitw(intr):
intr=intr.strip()
interwiki=' '
list2=[]
for item in intr.split('\n'):
list2.append(item)
newlist=sorted(list2)
for item2 in newlist:
interwiki=interwiki+item2+'\n'
return '\n'+interwiki.strip()
def checkbottom(text):
text = text.strip()
rege1 = re.compile(ur'\[\[([a-z]{2,3}|[a-z]{2,3}\-[a-z\-]{2,}|simple):.*?\]\]')
textl = ' '
if rege1.search(text):
for line in range(len(text.split(u'\n')),0,-1):
line = text.split(u'\n')[line-1]
if line.find('[') != -1:
break
if rege1.search(str(line)):
break
else:
textl = textl + line + u'\n'
text = text.replace(line,'',1)
textl = textl.strip()
if textl:
return u'\n'+textl+u'\n', text
else:
return u'\n', text
else:
return u'\n', text
def checknav(line):
nonav = (u'خرد', u'پرچم', u'تمیزکاری', u'حذف', u'حق تکثیر', u'نیاز-', u'رویداد روز', u'ترجمه', u'ویکیسازی', u'stub', u'Cite', u'Citation', u'ترتیب', u'یادکرد', u']', u'[', u':', u'»', u'«', u'اصلی|', u'درباره', u'-', u'جعبه', u'{{چر}}', u'{{رچ}}', u'مختصات', u'جستارهای وابسته', u'{{پیدیاف', u'پیوند', u'نشان زبان', u'وبگاه رسمی', u'پانویس', u'نقل قول}}', u'{{عربی', u'دیگر', u'دیگر کاربردها', u'ابهامزدایی', u'{{•}}', u'{{سخ}}', u'شعر', u'{{پایان', u'{{آغاز', u'مدرک', u'منبع', u'{{بر}}', u'{{لوث}}', u'{{چپچین}}', u'{{راستچین}}', u'،', u'.', u'شابک', u'ویکی(انبار|گفتاورد|مدیا|پدیا|نبشته|خبر|نسک|دانشگاه|گونه)') # ...
for item in nonav:
if line.find(item) != -1 :
return False
line = line.replace(u'\n','')
return line
def navboxfinder(text,page):
navepart,textb=' ',' '
count=-1
navelist=' '
if text.find('==') == -1:
return '\n', text.strip()
try:
lastpart = text.split(u'==')[-1]
except:
return ' ', text
lastpart = text.split(u'==')[-1]
for line in lastpart.split('\n'):
line = line.strip()
if line == '':
continue
else:
linebaz = string.count(line,u"{{")
linebasteh = string.count(line,u"}}")
devid = linebaz - linebasteh
if devid == 0 and linebaz > 0:
line=line.replace('{{','\n{{')
Template=checknav(line)
if Template!=False:
if not Template.split('{{')[0] or Template.split('}}')[1]:
if Template.find('{{')!=-1:
navelist=navelist+Template+'\n'
lastpart=lastpart.replace('\r\n','\n')
for nave in navelist.split('\n'):
nave=nave.strip()+'\n'
if nave!='\n':
lastpart=lastpart.replace(nave,'')
for nave in navelist.split('\n'):
if nave.find(page)!=-1:
navelist=navelist.replace(nave+'\n','')
navelist= nave+'\n'+navelist
textsp=len(text.split(u'=='))
counta=0
for nave in text.split(u'=='):
counta+=1
if counta < textsp:
textb=textb+nave+'=='
text = str(textb) + str(lastpart)
return '\n'+navelist.strip(),text.strip()
novebox,text = navboxfinder(text,page)
#--------------------------------------------------------------------------section arrangement --------------------------
text=unicodemaker(text)
regek = re.compile(ur'\{\{(.*?خرد|خرد.*?)\}\}')
if regek.search(text):
ma = regek.search(text)
khr='\n'+text[ma.start():ma.end()]+'\n'
text=text.replace(khr,'')
count=0
dict={}
list=[]
dicts={}
for rege in regex:
count+=1
rege1 = re.compile(rege[1])
if rege1.search(text):
m = rege1.search(text)
mored=text[m.start():m.end()]
if count<5 :
if mored.find('==')!=-1:
dict[rege[0]]=m.start()
else:
if mored.find('[['and']]')!=-1:
dict[rege[0]]=m.start()
text=text.strip()
from operator import itemgetter
dicts=sorted(dict.items(), key=itemgetter(1))
bottom,text=checkbottom(text)
text=text.strip()
for count in range(0,len(dicts)):
secstart=int(dicts[count][1])
if count==len(dicts)-1:
secend=-1
braket=text[-1]
else:
braket=''
secend=int(dicts[count+1][1])
if dicts[count][0]==u'pan':
pan='\n'+(text[secstart:secend]+braket).strip()
if dicts[count][0]==u'jos':
jos='\n'+(text[secstart:secend]+braket).strip()
if dicts[count][0]==u'man':
man='\n'+(text[secstart:secend]+braket).strip()
if dicts[count][0]==u'piv':
piv='\n'+(text[secstart:secend]+braket).strip()
if dicts[count][0]==u'cat':
cat='\n'+(text[secstart:secend]+braket).strip()
if dicts[count][0]==u'itw':
itw='\n'+(text[secstart:secend]+braket).strip()
partdown=pan+man+bottom+piv+novebox+khr+cat+arrangeitw(str(itw).strip())
if partdown.find('{{پانویس}}')==-1:
text=text.replace(u'\n== منابع ==\n', u'\n== منابع ==\n{{پانویس}}\n')
nomanadd=(u'{{پانویس}}', u'== منابع ==', u'== پانویس ==', u'<ref>', u'</ref>')
for nom in nomanadd:
if partdown.find(nom)!=-1:
manadd='\n'
try:
text=text[:int(dicts[0][1])]
except:
text=text
text=text+'\n'+(jos.strip()+manadd+partdown).strip()
jos,pan,man,piv,cat,itw,khr,manadd,bottom,novebox=' ',' ',' ',' ',' ',' ',' ',' ',' ',' '
dict={}
list=[]
dicts={}
text=checkbug(text).strip()
return text
##############################################################
def run(self):
manadd=' '
trovato_en = False
sen = wikipedia.Site('fa')
interwiki_list = []
for page in self.generator:
if page==True:
break
try:
if not page.canBeEdited():
wikipedia.output(u'Skipping locked page %s' % page.title())
continue
text_fa = page.get()
except wikipedia.NoPage:
wikipedia.output(u'Page %s not found' % page.title())
continue
except wikipedia.IsRedirectPage:
pageRedirect = page.getRedirectTarget()
text_fa = pageRedirect.get()
pageRedirect = page.getRedirectTarget()
text_fa = pageRedirect.get()
page=page.getRedirectTarget()
wikipedia.output(u'Page %s was Redirect but edited!' % page)
except:
continue
maghalekhoob=(u'{{مقاله برگزیده}}', u'{{مقاله خوب}}', u'{{مقاله پیشنهادی}}', u'{{مقاله برگزیده پیشین}}', u'{{مقاله برگزیده پیشین}}')
for magha in maghalekhoob:
if text_fa.find(magha)!=-1 :
maghalekhoobg=False
break
else:
maghalekhoobg=True
if maghalekhoobg==False :
continue
text= textcorections(text_fa)
if text.find('== منابع ==')==-1:
if text.find('{{پانویس}}')==-1:
manadd=u'\n== منابع ==\n{{پانویس}}\n'
else:
manadd=u'\n== منابع ==\n'
text=arrangepage(text,page,manadd)
# wikipedia.setAction(msg % page)
change(text,page)
if page==True:
break
def change(text, page):
txtTmp=text.strip()
#e: سطرها
txtTmp=re.sub(ur'(?<=[^\r\n])(\r\n\={1,5}.*?\={1,5}\r\n)', ur'\n\1',txtTmp)
txtTmp=re.sub(ur'(\[\[رده\:.*?\]\])(\n|\r\n)*(\[\[رده\:.*?\]\])', ur'\1\n\3',txtTmp)
txtTmp=re.sub(ur'(\r\n\={1,5}.*?\={1,5}\r\n\r\n)\r\n', ur'\1',txtTmp)
txtTmp=re.sub(ur'(\r\n){3,}', ur'\n\n',txtTmp)
txtTmp=re.sub(ur'\]\r\n+\*\s*\[', ur']\n* [',txtTmp)
txtTmp=re.sub(ur"\r\n+\}\}\r\n+'''", ur"\n}}\n'''",txtTmp)
#del: ZWNJ
txtTmp=re.sub(u'+', u'',txtTmp) # duplicate
txtTmp=re.sub(ur'(?<=[\w\s\r\n#{}\(\)<>«»؟!@$#%&-~÷×٪﷼٫,،٬;؛:\?\*\+\=\.\|\/\\"۰۱۲۳۴۵۶۷۸۹اآأإژزرذدوؤةء])', u'',txtTmp) # after
txtTmp=re.sub(ur'(?=[\w\s\r\n#{}\(\)<>«»؟!@$#%&-~÷×٪﷼٫,،٬;؛:\?\*\+\=\.\|\/\\"۰۱۲۳۴۵۶۷۸۹ء])', u'',txtTmp) # before
new_text=unicodemaker(text)
if new_text:
if new_text.find(u'#(REDIRECT|تغییرمسیر|[Rr]edirect)') == -1:
try:
page.put(new_text,msg % page, watchArticle = None, minorEdit = True)
except wikipedia.EditConflict:
wikipedia.output(u'Skipping %s because of edit conflict' % (page.title()))
except wikipedia.SpamfilterError,url:
wikipedia.output(u'Cannot change %s because of blacklist entry %s' % (page.title(),url))
manadd=' '
class Boteditor:
def __init__(self, generator, autoTitle = False, autoText = False):
self.generator = generator
def main():
gen = None
page=False
# summary message
summary_commandline = None
# Don't edit pages which contain certain texts.
exceptions = []
# commandline paramater.
# Which namespaces should be processed?
# default to [] which means all namespaces will be processed
namespaces = []
template = None
PageTitles = []
autoText = False
autoTitle = False
bloga=False
sourcea=False
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
genFactory = pagegenerators.GeneratorFactory()
# Load default summary message.
# BUG WARNING: This is probably incompatible with the -lang parameter.
wikipedia.setAction(msg)
# Read commandline parameters.
#-------------------------------------------------------------------------------------------------
for arg in wikipedia.handleArgs():
if arg == '-autotitle':
autoTitle = True
elif arg == '-autotext':
autoText = True
elif arg.startswith('-page'):
if len(arg) == 5:
PageTitles.append(wikipedia.input(u'Which page do you want to chage?'))
else:
PageTitles.append(arg[6:])
elif arg.startswith('-except:'):
exceptions.append(arg[8:])
elif arg.startswith('-blog:'):
bloga=True
elif arg.startswith('-source:'):
sourcea=True
elif arg.startswith('-page:'):
page=True
elif arg.startswith('-template:'):
template = arg[10:]
elif arg.startswith('-namespace:'):
namespaces.append(int(arg[11:]))
elif arg.startswith('-summary:'):
wikipedia.setAction(arg[9:])
summary_commandline = True
else:
generator = genFactory.handleArg(arg)
if generator:
gen = generator
print namespaces
if PageTitles:
pages = [wikipedia.Page(wikipedia.getSite(),PageTitle) for PageTitle in PageTitles]
gen = iter(pages)
if not gen:
# syntax error, show help text from the top of this file
wikipedia.showHelp('behsaz')
wikipedia.stopme()
sys.exit()
if namespaces != []:
gen = pagegenerators.NamespaceFilterPageGenerator(gen,namespaces)
# gen = pagegenerators.RedirectFilterPageGenerator(gen)
if page==True:
preloadingGen = pagegenerators.PreloadingGenerator(gen,pageNumber = 1)
else:
preloadingGen = pagegenerators.PreloadingGenerator(gen,pageNumber = 60)
# -------------------------------------------------------------------------------------------------
bot = Boteditor(preloadingGen,autoTitle,autoText)
run(bot)
if __name__ == "__main__":
bloga=False
sourcea=False
main()