ویکی‌پدیا:درخواست‌های ربات/ربات بایگانی به روش انتقال

برای اجرای این ربات باید این کد را در کنار ربات در فایل دیگری با نام calverter.py ذخیره نمائید تا ربات از آن کتابخانه برای تبدیل تاریخ شمسی و میلادی استفاده نماید.

این ربات هیچ آرگومانی ندارد و به صورت خودکار مواردی را که از {{بایگانی‌رضابات}} و {{بایگانی‌رضابات-کم‌حجم}} استفاده کرده‌اند را در صورت نیاز بایگانی می‌کند.

کد

!/usr/bin/python
-*- coding: utf-8 -*-
Reza(User:reza1615), 2011
Distributed under the terms of the CC-BY-SA 3.0 .

import wikipedia,pagegenerators, query,sys import os, re, time, locale, traceback, string, urllib,codecs import calverter from datetime import timedelta,datetime

try:

   import hashlib
   new_hash = hashlib.md5

except ImportError: #Old python?

   import md5
   new_hash = md5.md5

cal = calverter.Calverter() wikipedia.config.put_throttle = 0 wikipedia.put_throttle.setDelay() global secondpage,site

site=wikipedia.getSite('fa') def faen(b):

   try:
       b=unic(str(unic(b)))
   except:
       b=b
   b=b.replace(u'۰',u'0').replace(u'۱',u'1').replace(u'۲',u'2').replace(u'۳',u'3').replace(u'۴',u'4').replace(u'۵',u'5').replace(u'۶',u'6').replace(u'۷',u'7').replace(u'۸',u'8').replace(u'۹',u'9')
   return unic(b)

def enfa(b):

   try:
       b=unic(str(unic(b)))
   except:
       b=b
   b=b.replace(u'0',u'۰').replace(u'1',u'۱').replace(u'2',u'۲').replace(u'3',u'۳').replace(u'4',u'۴').replace(u'5',u'۵').replace(u'6',u'۶').replace(u'7',u'۷').replace(u'8',u'۸').replace(u'9',u'۹')
   return unic(b)

pagesformove=[] subnum=1

-------------------------setting------------------------

bottemplates=u'الگو:بایگانی‌رضابات-کم‌حجم',u'الگو:بایگانی‌رضابات' pagesizelimits=100000,50000 datedelay=3 toptemplate=u'ویکی‌پدیا:درخواست‌های ربات/ربات بایگانی به روش انتقال/بالای صفحه'

--------------------------------------------------------

def unic(varb):

   try:
       varb=unicode(varb,'UTF-8')
   except:
       varb=varb
   return varb

def findarchive(case):

       subnum=0
       pagesformove=[]
       casename = wikipedia.Page(site,case)
       namespace=casename.namespace()
       for pagesub in pagegenerators.PrefixingPageGenerator(case, namespace, False,None):
           pagename=unic(str(pagesub)).replace(u'fa:',u'').replace(u'',u).replace(unic(case),u).strip()
           if pagename.find(u'/بایگانی')!=-1:
               pagename=pagename.replace(u'/بایگانی',u).strip()
               try:
                   subnum=max(subnum,int(faen(pagename)))
               except:
                   continue    
       archivelink=case+u'/بایگانی '+enfa(subnum+1)
       pagesformove.append(case)
       pagesformove.append(archivelink)
       return pagesformove

def int2month(num):

   num=int(num)
   """Returns the locale's full name of month 'num' (1-12)."""
   if hasattr(locale, 'nl_langinfo'):
       return locale.nl_langinfo(locale.MON_1+num-1).decode('utf-8')
   Months = ['january', 'february', 'march', 'april', 'may_long', 'june',
             'july', 'august', 'september', 'october', 'november', 'december']
   return Months[num-1]

def txt2timestamp(txt, format):

   """Attempts to convert the timestamp 'txt' according to given 'format'.
   On success, returns the time tuple; on failure, returns None."""
   try:
       return time.strptime(txt,format)
   except ValueError:
       try:
           return time.strptime(txt.encode('utf8'),format)
       except:
           pass
       return None

def persianmonth(a):

   a=a.replace(u"ٔ",u"")
   b=a.replace(u"ژانویه",u"January")
   b=b.replace(u"فوریه",u"February")
   b=b.replace(u"مارس",u"March")
   b=b.replace(u"آوریل",u"April")
   b=b.replace(u"مه",u"May")
   b=b.replace(u"ژوئن",u"June")
   b=b.replace(u"ژوئیه",u"July")
   b=b.replace(u"اوت",u"August")
   b=b.replace(u"سپتامبر",u"September")
   b=b.replace(u"اکتبر",u"October")
   b=b.replace(u"نوامبر",u"November")
   b=b.replace(u"دسامبر",u"December")
   b=b.replace(u"ٔ",u"")
   return b

def moving(pageslist,movesection):

   bottemplates=u,u
   gen,prefix,oldName,newName,summary= None,None,None,None,u'بایگانی'
   always,skipredirects,noredirect = False,False,True
   pair = pageslist
   page = wikipedia.Page(wikipedia.getSite(), pair[0])
   page.move(pair[1], reason=summary,throttle=True,leaveRedirect=True,movetalkpage=False)
   page = wikipedia.Page(wikipedia.getSite(), pair[1])
   text2=page.get()
   for bottemp in bottemplates:
       if text2.find(bottemp)!=-1:

text2=text2.replace(bottemp,u'

بحث زیر پایان یافته‌است و به‌زودی بایگانی خواهد شد.').replace(u'\r',u)

             bottemplateout=bottemp
   text2=text2.replace(u'ویکی‌پدیا:درخواست‌های ربات/ربات بایگانی به روش انتقال/بالای صفحه',u'{{'+pair[0]+u'/بالای صفحه}}')
   for removeSec in movesection:
       text2=text2.replace(removeSec.replace(u'\r',u).strip(),u)

page.put(text2+u'\n

\n',u'ربات:حذف الگوی بایگانی+انتقال بحث‌های درجریان')

   return bottemplateout

def findsection(text):

   threads=[]
   sections=[]
   header=u'\n'
   lines=text.split('\n')
   state = 0 #Reading header
   curThread = None
   for line in lines:
       threadHeader = re.search(ur'^== *([^=].*?) *== *$',line)
       if threadHeader:
               threads.append(threadHeader.group(0))
   numpart=len(threads)
   for part in range(0,numpart-1,1):
       sections.append(threads[part]+text.split(threads[part])[1].split(threads[part+1])[0])
   sections.append(threads[numpart-1]+text.split(threads[numpart-1])[1])
   try:
       toppage=text.split(threads[0])[0]
   except:
       toppage=u'\n'
   return sections,toppage

def timenows(datedelay):

       now = datetime.now()
       now = str(now-timedelta(datedelay))
       newdate=now.split('-')[2].split(' ')[0]+u' '+int2month(now.split('-')[1])+u' '+now.split('-')[0]+u'، ساعت '+now.split(' ')[1].split(':')[0]+':'+now.split(' ')[1].split(':')[1]+u' (UTC)'
       TIMEYES = txt2timestamp(newdate,u"%d %B %Y، ساعت %H:%M (%Z)")
       timestamp2 = time.mktime(TIMEYES)
       return timestamp2

def feedLine(line,timestampnow,signfind):

       line=faen(line)
       line=persianmonth(line).strip()
       TM = re.search(ur'(\d{1,2}) (\S+) (\d\d\d\d)، ساعت (\d\d{1,2}):(\d\d{1,2}) \(.*?\)', line)
       if not TM:
           TM = re.search(ur'(\d\d):(\d\d), (\S+) (\d\d?), (\d\d\d\d) \(.*?\)', line)
       if not TM:
           TM = re.search(ur'(\d{4})\. (\S+) (\d\d?)\., (\d\d:\d\d) \(.*?\)', line)
       if not TM:
           TM = re.search(ur'(\d\d?)\. (\S+) (\d\d\d\d) kl\.\W*(\d\d):(\d\d) \(.*?\)', line)
       if not TM:
           TM = re.search(ur'(\d\d?)\. (\S+) (\d\d\d\d) kello \W*(\d\d).(\d\d) \(.*?\)', line)
       if not TM:
           TM = re.search(ur'(\d\d):(\d\d), (\d\d?)\. (\S+)\.? (\d\d\d\d) \(UTC\)', line)
           #pat = re.compile(r'(\d\d):(\d\d), (\d\d?)\. (\S+)\.? (\d\d\d\d) \(UTC\)')
           #TM = pat.search(line)
       if not TM:
               TM = re.search(ur'(\d{1,2}) (\S+) (\d\d\d\d)، ساعت (\d\d{1,2}):(\d\d{1,2}) \(UTC\)', line)
       if not TM:
               TM = re.search(ur'(\d{1,2}) (\S+) (\d\d\d\d)، ساعت (\d\d{1,2}):(\d\d{1,2}) \(.*?\)', line)
       if not TM:
               TM = re.search(ur'(\d{1,2}) (\S+) (\d\d)، ساعت (\d\d{1,2}):(\d\d{1,2}) \(.*?\)', line)
       if TM:         
           TIME = txt2timestamp(TM.group(0),"%d. %b %Y kl. %H:%M (%Z)")
           newsign=TM.group(0)
           if not TIME:
               datej=TM.group(0)
               datejmain=TM.group(0)
               if datej.find(u'فروردین')!=-1 or datej.find(u'اردیبهشت')!=-1 or datej.find(u'خرداد')!=-1 or datej.find(u'تیر')!=-1 or datej.find(u'مرداد')!=-1 or datej.find(u'شهریور')!=-1 or datej.find(u'مهر')!=-1 or datej.find(u'آبان')!=-1 or datej.find(u'آذر')!=-1 or datej.find(u'دی')!=-1 or datej.find(u'بهمن')!=-1 or datej.find(u'اسفند')!=-1 :
                   datef=TM.group(0).replace(u'،',u).replace(u',',u).replace(u'-',u)
                   datef=datef.replace(u'فروردین',u'1').replace(u'اردیبهشت',u'2').replace(u'خرداد',u'3').replace(u'تیر',u'4').replace(u'مرداد',u'5').replace(u'شهریور',u'6')
                   datef=datef.replace(u'مهر',u'7').replace(u'آبان',u'8').replace(u'آذر',u'9').replace(u'دی',u'10').replace(u'بهمن',u'11').replace(u'اسفند',u'12').strip()
                   dayf=int(datef.split(u' ')[0])
                   monthf=int(datef.split(u' ')[1])
                   yearf=int(datef.split(u' ')[2])
                   if yearf<1300:
                       yearf+=1300
                   cal = calverter.Calverter()
                   yearen,monthen,dayen=cal.jd_to_gregorian(cal.jalali_to_jd(yearf, monthf, dayf))
                   monthen=persianmonth(int2month(monthen))
                   newsign=datejmain.replace(str(yearf),str(yearen))
                   newsign=newsign.replace(u'فروردین',str(monthen)).replace(u'اردیبهشت',str(monthen)).replace(u'خرداد',str(monthen)).replace(u'تیر',str(monthen)).replace(u'مرداد',str(monthen)).replace(u'شهریور',str(monthen))
                   newsign=newsign.replace(u'مهر',str(monthen)).replace(u'آبان',str(monthen)).replace(u'آذر',str(monthen)).replace(u'دی',str(monthen)).replace(u'بهمن',str(monthen)).replace(u'اسفند',str(monthen)).strip()
                   newsign=newsign.replace(str(dayf),str(dayen),1)
                   patern = re.search(r'\(.*?\)', newsign).group(0)
                   newsign=newsign.replace(patern,u'(UTC)')
           if not TIME:
               TIME = txt2timestamp(newsign, "%Y. %B %d., %H:%M (%Z)")
           if not TIME:
               TIME = txt2timestamp(newsign,"%d. %b %Y kl.%H:%M (%Z)")
           if not TIME:
               TIME = txt2timestamp(newsign,"%H:%M, %d %B %Y (%Z)")
           if not TIME:
               TIME = txt2timestamp(newsign,"%H:%M, %d %b %Y (%Z)")
           if not TIME:
               TIME = txt2timestamp(re.sub(' *\([^ ]+\) *',,newsign),"%H:%M, %d %b %Y")
           if not TIME:
               TIME = txt2timestamp(newsign,"%H:%M, %b %d %Y (%Z)")
           if not TIME:
               TIME = txt2timestamp(newsign,"%H:%M, %B %d %Y (%Z)")
           if not TIME:
               TIME = txt2timestamp(newsign,"%H:%M, %b %d, %Y (%Z)")
           if not TIME:
               TIME = txt2timestamp(newsign,"%H:%M, %B %d, %Y (%Z)")
           if not TIME:
               TIME = txt2timestamp(newsign,"%d. %Bta %Y kello %H.%M (%Z)")
           if not TIME:
               TIME = txt2timestamp(newsign,"%H:%M, %d. %b. %Y (%Z)")
           if not TIME:
               TIME = txt2timestamp(newsign,u"%d %b %Y، ساعت %H:%M (%Z)")
           if not TIME:
               TIME = txt2timestamp(newsign,u"%d %B %Y، ساعت %H:%M (%Z)")
           if TIME:
               timestampnow=time.mktime(TIME)
               signfind=True
           else:
               wikipedia.output(u'Not found!')
       return timestampnow,signfind

def perun(pagesizelimit,firstpage):

   fapage = wikipedia.Page(site, firstpage)
   text=fapage.get()
   sizepage=sys.getsizeof(text)
   if pagesizelimit < sizepage:
       return True
   else:
       return False

def run(timestampnow,pagesizelimit,pagesformove,toptemplate):

   firstpage=pagesformove[0]
   movesection=[]
   fapage = wikipedia.Page(site, firstpage)
   text=fapage.get()
   sizepage=sys.getsizeof(text)
   if pagesizelimit < sizepage:
       items,toppage=findsection(text)
       for item in items:
           #print item    
           lines=item.split('\n')
           signfind=False
           for line in lines:
               if line=="":
                   continue
               sign,signfind=feedLine(line,timestampnow,signfind)
           if signfind==False:
               continue
           if sign>timestampnow:
               movesection.append(item)
       bottemplates=moving(pagesformove,movesection)
       firstpage=pagesformove[0]
       toppage=toppage.replace(bottemplates+u'\n',u).strip()
       newtext=bottemplates+u'\n'+toppage+u'\n'
       for item in movesection:
           newtext+=item
       fapage = wikipedia.Page(site, firstpage)
       fapage.put(newtext,u'ربات:کپی از بایگانی')

if __name__ == "__main__":

   cases=[]
   timestampnow = timenows(datedelay)
   counter=-1
   for vttempl in bottemplates:
       counter+=1
       pagesizelimit=pagesizelimits[counter]
       templates=wikipedia.Page(site,vttempl)
       bottemplate=templates.getReferences()
       for case in bottemplate:
               talkpage=case.title()
               if talkpage.find(u'توضیحات')!=-1 :
                   continue
               if talkpage.find(u'بایگانی')!=-1 :
                   continue
               try:
                  textfa=case.get()
                  temple=vttempl.replace(u'الگو:',u'الگو:')+u''           
                  if textfa.find(temple)==-1:
                     wikipedia.output(talkpage)    
                     wikipedia.output(u"\03{lightred}page dosen't have the Archive template\03{default}")
                     continue
               except:
                  continue
               wikipedia.output(u'-----------------------')
               wikipedia.output( talkpage)
               if perun(pagesizelimit,talkpage):
                       #try:
                           bothpages=findarchive(talkpage)
                           run(timestampnow,pagesizelimit,bothpages,toptemplate)
                       #except:
                           #wikipedia.output(u"\03{lightred}Runing bot on this page came with error\03{default}")
                           #continue