Projet

Général

Profil

Paste
Télécharger (1,2 ko) Statistiques
| Branche: | Révision:

root / scripts_divers / migrer_taches_vers_redmine / url_parser.py @ 65ce03da

1 565596d9 jenselme
#!/usr/share/env python3
2
3
from html.parser import HTMLParser
4 8a509595 jenselme
import httplib2
5 565596d9 jenselme
6
7
class LinksParser(HTMLParser):
8 a2a6f2e9 jenselme
    "Classe permettant de parser du html"
9 565596d9 jenselme
    def __init__(self):
10
        HTMLParser.__init__(self)
11
        self.recording = 0
12
        self.data = []
13
14
    def handle_starttag(self, tag, attributes):
15
        if tag != 'span':
16
            return
17
        if self.recording:
18
            self.recording += 1
19
            return
20
        for name, value in attributes:
21
            if name == 'class' and value == 'parse_me':
22
                break
23
        else:
24
            return
25
        self.recording = 1
26
27
    def handle_endtag(self, tag):
28
        if tag == 'span' and self.recording:
29
            self.recording -= 1
30
31
    def handle_data(self, data):
32
        if self.recording:
33
            self.data.append(data)
34
35 8a509595 jenselme
def give_nids(url):
36 565596d9 jenselme
    p = LinksParser()
37 8a509595 jenselme
    h = httplib2.Http()
38 565596d9 jenselme
39 8a509595 jenselme
    resp, content = h.request(url, 'GET')
40
    text = content.decode('utf-8')
41
42
    p.feed(text)
43 565596d9 jenselme
    return p.data
44
45 8a509595 jenselme
def give_json_urls(url, base_url):
46
    nids = give_nids(url)
47 565596d9 jenselme
    tache_urls = []
48
    for nid in nids:
49 8a509595 jenselme
        tache_urls.append(base_url + '/node/' + nid + '.json')
50 a2a6f2e9 jenselme
    return nids, tache_urls #on a besoin des nids pour après.