I am trying to download files from internet for research purposes, but when I tried to move fromto python2 to python3 . I got an error TypeError: a bytes-like object is required, not 'str'
because python3 treats string different,but I had to change from .content
to .text
, and it fixed , but it doesnt download the files , but it’s grabing them, how can I force to download them?
def downloadFile(self, url):
fDir=self.outputDir
local_file = None
if not os.path.isdir(fDir):
os.makedirs(fDir)
try:
f = urllib.request.urlopen(url, timeout=10)
for x in range(len(self.signature)):
if ord(f.read(1))!=self.signature[x]:
f.close()
raise
local_file=open("%s/file%08d.%s" % (fDir, self.successCount, self.extension), "wb")
for x in range(len(self.signature)):
local_file.write(chr(self.signature[x]))
local_file.write(f.read())
local_file.close()
f.close()
except KeyboardInterrupt:
raise
except:
if local_file != None:
local_file.close()
for x in range(10):
try:
if os.path.isfile("%s/file%08d.%s" % (fDir, self.successCount, self.extension)):
os.remove("%s/file%08d.%s" % (fDir, self.successCount, self.extension))
break
except:
if x==9:
raise
time.sleep(1)
return
self.successCount += 1
def search(self):
if self.extension == None or self.extension == "":
print("ERROR: No extension specified!")
return
if len(self.signature) == 0:
print("WARNING: No signature specified - THERE WILL BE LOT OF FALSE RESULTS :(")
print("Starting with search")
print("---------------------")
print("Extension: " + self.extension)
print("Signature: " + self.signatureText())
print("Starting search base: " + self.searchCharsText())
print("Output dir: " + self.outputDir)
print("Max results per search: " + str(self.maxPerSearch))
self.searchReal("")
pos=r.text.find('<a href="')
while pos != -1:
pos2_a=r.text.find('"', pos+16)
pos2_b=r.text.find('&', pos+16)
if pos2_a == -1:
pos2 = pos2_b
elif pos2_b == -1:
pos2 = pos2_a
else:
pos2 = min (pos2_a, pos2_b)
if pos2 == -1:
break;
url = r.text[pos+16:pos2]
if url.find('.google.') == -1 and url.startswith('http'):
blocked = False
if url not in self.downloaded:
self.downloadFile(url)
self.downloaded.append(url)
f.write(url + "\n")
pos_a=r.text.find('<a href="', pos+1)
pos_b=r.text.find('a href="/url?q=', pos+1)
if pos_a == -1:
pos = pos_b
elif pos_b == -1:
pos = pos_a
else:
pos=min(pos_a, pos_b)
log
http://www.aamalaysia.org/pdf/p-1_thisisaa1.pdf
https://www.deanza.edu/articulation/documents/ge-aa-as-dac.pdf
https://aamexico.org.mx/media/Lista_de_precios_%2520vigentes.pdf
https://www.aflglobal.com/productlist/Product-Lines/Conductor-Accessories/230kV-Aluminum-Welded-Bus-Pipe-Supports/doc/230kv-aluminum-welded-bus-supports.aspx