Projet

Général

Profil

Paste
Télécharger (1,05 ko) Statistiques
| Branche: | Révision:

root / scripts_divers / migrer_taches_vers_redmine / url_parser.py @ 565596d9

1
#!/usr/share/env python3
2

    
3
from html.parser import HTMLParser
4

    
5
SITE_URL = 'http://assos.centrale-marseille.fr'
6

    
7
class LinksParser(HTMLParser):
8
    def __init__(self):
9
        HTMLParser.__init__(self)
10
        self.recording = 0
11
        self.data = []
12

    
13
    def handle_starttag(self, tag, attributes):
14
        if tag != 'span':
15
            return
16
        if self.recording:
17
            self.recording += 1
18
            return
19
        for name, value in attributes:
20
            if name == 'class' and value == 'parse_me':
21
                break
22
        else:
23
            return
24
        self.recording = 1
25

    
26
    def handle_endtag(self, tag):
27
        if tag == 'span' and self.recording:
28
            self.recording -= 1
29

    
30
    def handle_data(self, data):
31
        if self.recording:
32
            self.data.append(data)
33

    
34
def give_nids():
35
    p = LinksParser()
36

    
37
    with open('tache.html', 'r') as input:
38
        p.feed(input.read())
39
    return p.data
40

    
41
def give_urls(nids):
42
    tache_urls = []
43
    for nid in nids:
44
        tache_urls.append(SITE_URL + '/node/' + nid)
45
    return tache_urls