Projet

Général

Profil

Paste
Télécharger (1,12 ko) Statistiques
| Branche: | Révision:

root / scripts_divers / migrer_taches_vers_redmine / url_parser.py @ 8a509595

1
#!/usr/share/env python3
2

    
3
from html.parser import HTMLParser
4
import httplib2
5

    
6

    
7
class LinksParser(HTMLParser):
8
    def __init__(self):
9
        HTMLParser.__init__(self)
10
        self.recording = 0
11
        self.data = []
12

    
13
    def handle_starttag(self, tag, attributes):
14
        if tag != 'span':
15
            return
16
        if self.recording:
17
            self.recording += 1
18
            return
19
        for name, value in attributes:
20
            if name == 'class' and value == 'parse_me':
21
                break
22
        else:
23
            return
24
        self.recording = 1
25

    
26
    def handle_endtag(self, tag):
27
        if tag == 'span' and self.recording:
28
            self.recording -= 1
29

    
30
    def handle_data(self, data):
31
        if self.recording:
32
            self.data.append(data)
33

    
34
def give_nids(url):
35
    p = LinksParser()
36
    h = httplib2.Http()
37

    
38
    resp, content = h.request(url, 'GET')
39
    text = content.decode('utf-8')
40

    
41
    p.feed(text)
42
    return p.data
43

    
44
def give_json_urls(url, base_url):
45
    nids = give_nids(url)
46
    tache_urls = []
47
    for nid in nids:
48
        tache_urls.append(base_url + '/node/' + nid + '.json')
49
    return nids, tache_urls