/[volute]/trunk/projects/WebAssets/tools/docrepoToADS/harvest.py
ViewVC logotype

Diff of /trunk/projects/WebAssets/tools/docrepoToADS/harvest.py

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 5327 by msdemlei, Fri Jan 18 13:00:37 2019 UTC revision 5328 by msdemlei, Thu Mar 14 13:22:27 2019 UTC
# Line 51  Line 51 
51  import re  import re
52  import sys  import sys
53  import traceback  import traceback
 import urllib  
54  import urlparse  import urlparse
55    
56  import BeautifulSoup  import BeautifulSoup
# Line 97  Line 96 
96                  self.payload = payload                  self.payload = payload
97                  Exception.__init__(self, "Unexpected div")                  Exception.__init__(self, "Unexpected div")
98    
 class AppURLopener(urllib.FancyURLopener):  
     version = "IVOA-ADS bridge"  
   
 urllib._urlopener = AppURLopener()  
   
99    
100  def get_with_cache(url):  def get_with_cache(url):
101          cacheName = re.sub("[^\w]+", "", url)+".cache"          cacheName = re.sub("[^\w]+", "", url)+".cache"
102          if CACHE_RESULTS and os.path.exists(cacheName):          if CACHE_RESULTS and os.path.exists(cacheName):
103                  doc = open(cacheName).read()                  doc = open(cacheName).read().decode("utf-8")
104          else:          else:
105                  doc = urllib.urlopen(url).read()                  doc = requests.get(url).text
106                  if CACHE_RESULTS:                  if CACHE_RESULTS:
107                          f = open(cacheName, "w")                          f = open(cacheName, "w")
108                          f.write(doc)                          f.write(doc.encode("utf-8"))
109                          f.close()                          f.close()
110          return doc          return doc
111    
# Line 531  Line 525 
525                  from the index at root_url.                  from the index at root_url.
526                  """                  """
527                  doc_index = BeautifulSoup.BeautifulSoup(                  doc_index = BeautifulSoup.BeautifulSoup(
528                          urllib.urlopen(root_url).read())                          requests.get(root_url).text)
529                  docs = []                  docs = []
530                                    
531                  for url in itertools.chain(                  for url in itertools.chain(

Legend:
Removed from v.5327  
changed lines
  Added in v.5328

msdemlei@ari.uni-heidelberg.de
ViewVC Help
Powered by ViewVC 1.1.26