Projet

Général

Profil

Paste
Télécharger (1,2 ko) Statistiques
| Branche: | Révision:

root / scripts_divers / migrer_taches_vers_redmine / url_parser.py @ a2a6f2e9

1
#!/usr/share/env python3
2

    
3
from html.parser import HTMLParser
4
import httplib2
5

    
6

    
7
class LinksParser(HTMLParser):
8
    "Classe permettant de parser du html"
9
    def __init__(self):
10
        HTMLParser.__init__(self)
11
        self.recording = 0
12
        self.data = []
13

    
14
    def handle_starttag(self, tag, attributes):
15
        if tag != 'span':
16
            return
17
        if self.recording:
18
            self.recording += 1
19
            return
20
        for name, value in attributes:
21
            if name == 'class' and value == 'parse_me':
22
                break
23
        else:
24
            return
25
        self.recording = 1
26

    
27
    def handle_endtag(self, tag):
28
        if tag == 'span' and self.recording:
29
            self.recording -= 1
30

    
31
    def handle_data(self, data):
32
        if self.recording:
33
            self.data.append(data)
34

    
35
def give_nids(url):
36
    p = LinksParser()
37
    h = httplib2.Http()
38

    
39
    resp, content = h.request(url, 'GET')
40
    text = content.decode('utf-8')
41

    
42
    p.feed(text)
43
    return p.data
44

    
45
def give_json_urls(url, base_url):
46
    nids = give_nids(url)
47
    tache_urls = []
48
    for nid in nids:
49
        tache_urls.append(base_url + '/node/' + nid + '.json')
50
    return nids, tache_urls #on a besoin des nids pour après.