User:SgtLion/ListofIDcandidates

From D&D Wiki

Jump to: navigation, search

This project has been superceded by my bot-powers. Though it would still work in theory, I'd regard it as obsolete.

See User:SgtLion/MasterConsole for DPL-version that actually only took 10 minutes to work out. Ah well.

Complete list of dandwiki pages that do not have a correct ID, or do not have a category, consistent content, or precedent that exempts them from requiring an ID.

Anyhow, here: There's nothing here!


I've released the python 3.3 code under GNU, as all contributions to Dandwiki are, 'cause I've no reason to keep it private. So feel free to use it as such~
For recursively using wget.exe to download all dandwiki articles:

#Program return is all files (that are linked together) of a site
import os
from re import compile
global doneurls
doneurls = ['http://www.dandwiki.com/wiki/Main_Page','http://www.dandwiki.com/wiki/Con'] #List of URLS already wget-ed
global goneurls
goneurls = []
global count #Count for progress indication
count = 0

def finddir(): #Finds directory of .py file
	a = ''
	b = os.path.realpath(__file__).count('\\')
	c = 0
	while a.count('\\') < b:
		a = a + os.path.realpath(__file__)[c]
		c += 1
	return a

def urlfile(url): #Reduces a URL to its target filename
	a = ''
	b = url.count('/')
	for char in url:
		if char == '/':
			b = b - 1
		if b == 0 and char != '/':
			a = a + char
	return a

def down(url): #wgets url and adds to doneurl list
	global doneurls
	print('C:/market/www.dandwiki.com/wget.exe ' + url + ' -q -nc')
	os.system('C:/market/www.dandwiki.com/wget.exe ' + url + ' -q -nc')
	doneurls = doneurls + [url]
	
def Getgoneurls: #Returns content of finddir()+'goneurls.txt' as a list, one line == element
	goneurls = open(finddir()+'goneurls.txt', 'r')
	while line[len(line)] == '\n':
		result = result + [goneurls.readline().replace('\n','')	]
	goneurls.close()
	return result

def readurls(filename): #Downloads found URLs in a file and downloads URLs from those target URLs and so on. Recursive function.
	global doneurls
	global count
	count = count + 1
	if count % 100 == 0:
		print(count)
		print(' ')
	filename = filename.replace('?','@')
	if not os.path.exists(finddir()+filename):
		if os.path.exists(finddir()+filename+'.php'):
			filename = filename + '.php'
		else:
			print(filename + ' cannot be found')
			goneurls = open(finddir()+'goneurls.txt', 'a')
			goneurls.write(filename+'\n')
			goneurls.close()
			#with open(finddir()+'goneurls.txt', 'a') as goneurls:
			#	goneurls.write(url)
	if os.path.exists(finddir()+filename):
		file = open(finddir()+filename, 'r')
		patt = compile("\/wiki\/[a-zA-Z1-9\.\_\ \:\%\(\)\?\=\&\;\+\*]+(?=\")|w\/[a-zA-Z1-9\.\_\ \:\%\(\)\?\=\&\;\+\*]+(?=\")|p\/[a-zA-Z1-9\.\_\ \:\%\(\)\?\=\&\;\+\*]+(?=\")")
		#\/(wiki|w|p)\/[a-zA-Z1-9\.\_\ \:\%\(\)\?\=\&\;\+\*]+(?=\")
		#http\:\/\/www\.dandwiki\.com)?\/(wiki|w|p)\/[a-zA-Z1-9\.\_\ \:\%\(\)\?\=\&\;\+\*]+(?=\")
		try:
			URLS = patt.findall(file.read())
		except UnicodeDecodeError:
			return None
		#print(URLS)
		for url in URLS:
			if urlfile(url).find("index") == -1:
				url = url.replace(' ','_').replace("//","/")
				if url.find("http://www.dandwiki.com") == -1:
					if url[0] != '/':
						url = '/' + url
					url = "http://www.dandwiki.com" + url
				#print('\n' + url + '\n\n')
				#print(doneurls)
				#print('\n')
				if (url not in doneurls) and (url not in Getgoneurls()):
					down(url)
					readurls(urlfile(url))
if not os.path.exists(finddir()+'goneurls.txt'):
	file = open(finddir()+'goneurls.txt', 'w')
	file.close()
readurls('Main_Page')


Production of a list.txt of articles that require an ID to be suffixed, or do not have categories or other consistent content to exempt them from requiring an ID:

import os

def finddir(): #Finds directory of .py file
	a = ''
	b = os.path.realpath(__file__).count('\\')
	c = 0
	while a.count('\\') < b:
		a = a + os.path.realpath(__file__)[c]
		c += 1
	return a

def HasAcceptableID(entry):
	AcceptableIDs = ['_Leech)', '(Grim_Alteration)', '_Other)', '_(terminology)', '_(Campaign_Seed)', '_(anthology)', '_Incantation)', '_Flaw)', '_Optimized_Character_Build)', '_Cleric_Domain)', '_(novel)', '_Index)', '_Story)', '_Elite_Character)', 'Tradition', '_Creature_Ability)', '_Creature_Overview)', '_Skill_Use)', '(4e_Hazard)', '_Spell_Mechanic)', '_Creature_Type)', '(4e_Future_Prototype)', '_Quest)', '_Sphere)', '_Template)', '_Epic_Spell_Seed)', '_(4e_Epic_Destiny)', '_Campaign_Setting)', '_Disease)', '_Pantheon)', '_Trap)', '_Substitution_Level)', '_Substitution_Levels)', '_Trait)', '_Vestige)', '_Skill_Challenge)', '_Power_List)', '_Paragon_Path)', '_Creature_Subtype)', '_Sourcebook)', '_Demonic_Pacts)', '_Environment)', '_Deity)', '_Class)', '_Equipment)', '_Variant_Rule)', '_Variant)', '_Fighting_Style)', '_Alternate_Class_Feature)', '_Skill)', '3', '4e_', '3', '_Spell)', '_Race)', '_Supplement)', '_NPC)', '_Feat)', '_Power)', '_Creature)', '_Setting)', '_Ritual)', '_Material)', '_Bloodline)', '_Spell_List)', '_Bodily_Relic)', '_Boost)', '_Invocation)', '_Maneuver)']
	i = 0
	l = len(entry)
	while i < len(AcceptableIDs):
		if entry.find(AcceptableIDs[i]) == l-len(AcceptableIDs[i]):
			return False
		i += 1
	return True
	
wikidir = finddir()# + 'wiki\\' #Finds folder for dndwiki files
from os import listdir
from os.path import isfile, join
onlyfiles = [ f for f in listdir(wikidir) if isfile(join(wikidir,f)) ] #Creates list of files in wikidir
out = open(finddir() + "list.txt",'w')
entry = ''
for entry in onlyfiles: #If isn't exempt from ID spaces and doesn't have an ID, print to list
	l = len(entry)
	if l > 10:
		entry = entry.replace(' ','_')
		#print(entry)
		if entry[1] != '.' and HasAcceptableID(entry) and 0 not in [entry.find('D&D_Wiki%3'),entry.find('UA_Talk%3'),entry.find('Add_New_'),entry.find('Template%3'),entry.find('UA_Talk%3'),entry.find('User_'),entry.find('User%3'),entry.find('File%3'),entry.find('UA%3'),entry.find('DnD'),entry.find('Category%3'),entry.find('SRD3e%3'),entry.find('Special%3'),entry.find('MSRD%3'),entry.find('Talk%3'),entry.find('SRD%3'),entry.find('4e_'),entry.find('SRD_Talk%3')]:
			entryfile = open(finddir()+entry,'r', errors='ignore')
			contents = entryfile.read()
			if contents.find("\"Meta page\"") == -1 and contents.find('<div id="contentSub">(Redirected from') == -1 and contents.find("\"Publication\"") == -1 and contents.find("\"Open Setting Licensed-content\"") == -1 and contents.find("\"Guideline\"") == -1:
				out.write(entry)
				out.write('\n')
out.close()
Home of user-generated,
homebrew pages!


admin area
Terms and Conditions for Non-Human Visitors