Project

General

Profile

Paste
Download (5.19 KB) Statistics
| Branch: | Revision:

root / scripts_divers / migrer_taches_vers_redmine / node_parser.py @ 565596d9

1
from html.parser import HTMLParser
2
import re
3

    
4
DONE_RATIO = {'À commencer': 0, 'Entamée': 20, 'Bien avancé': 80, 'Terminée (success)': 100, 'Fermée (won\'t fix)': 100}
5
PRIORITY = {'5 - Très basse': 3, '4 - Basse': 3, '3 - Moyenne': 4, '2 - Haute': 5, '1 - Très haute': 6}
6
STATUS = {'En cours': 2, 'Fermée': 5, 'Rejetée': 6, 'En pause': 7}
7

    
8
class LinksParser(HTMLParser):
9
    def __init__(self):
10
        HTMLParser.__init__(self)
11
        self.recording = 0
12
        self.data = []
13

    
14
    def handle_starttag(self, tag, attributes):
15
        if tag != 'div':
16
            return
17
        if self.recording:
18
            self.recording += 1
19
            return
20
        for name, value in attributes:
21
            if name == 'class' and value == 'node-content':
22
                break
23
        else:
24
            return
25
        self.recording = 1
26

    
27
    def handle_endtag(self, tag):
28
        if tag == 'div' and self.recording:
29
            self.recording -= 1
30

    
31
    def handle_data(self, data):
32
        if self.recording:
33
            self.data.append(data)
34

    
35
def parse_tache(input):
36
    p = LinksParser()
37
    output = ""
38
    drupal_title = ''
39
    for ligne in input:
40
        if not drupal_title:
41
            drupal_title = re.findall(r'<title>(.*?)</title>', ligne)
42
        ligne = re.sub(r'<p( class="[a-z -]*")?( id="[a-z]*")?>', r'', ligne)
43
        ligne = re.sub(r'</p>', r'', ligne)
44
        ligne = re.sub(r'<h([1-9])( class="[a-z -]*")?( id="[a-z]*")?( rel="nofollow")?>\n?', r'\nh\1. ', ligne)
45
        ligne = re.sub(r'</h[1-9]>', r'\n', ligne)
46
        ligne = re.sub(r'^[ \t]+', '', ligne)
47
        ligne = re.sub(r'<br />', r'\n', ligne)
48
        ligne = re.sub(r'<li( class="[a-z -]*")?( id="[a-z]*")?>', r'', ligne)
49
        ligne = re.sub(r'(.*)</li>', r'# \1', ligne)
50
        ligne = re.sub(r'<ol( class="[a-z -]*")?( id="[a-z]*")?>', r'', ligne)
51
        ligne = re.sub(r'</ol>', r'', ligne)
52
        ligne = re.sub(r'<ul( class="[a-z -]*")?( id="[a-z]*")?>', r'', ligne)
53
        ligne = re.sub(r'</ul>', r'', ligne)
54
        ligne = re.sub(r'<pre( class="[a-z -]*")?( id="[a-z]*")?>', r'balise_pre', ligne)
55
        ligne = re.sub(r'</pre>', r'/balise_pre', ligne)
56
        ligne = re.sub(r'<code( class="[a-z -]*")?( id="[a-z]*")?>', r'balise_code', ligne)
57
        ligne = re.sub(r'</code>', r'/balise_code', ligne)
58
        ligne = re.sub(r'<em( class="[a-z -]*")?( id="[a-z]*")?>', r'_', ligne)
59
        ligne = re.sub(r'</em>', r'_', ligne)
60
        ligne = re.sub(r'<b( class="[a-z -]*")?( id="[a-z]*")?>', r'*', ligne)
61
        ligne = re.sub(r'</b>', r'*', ligne)
62
        ligne = re.sub(r'<strong( class="[a-z -]*")?( id="[a-z]*")?>', r'*', ligne)
63
        ligne = re.sub(r'</strong>', r'*', ligne)
64
# #        ligne = re.sub(r'<a href="(.*)"( class="[a-z -]*")?( id="[a-z]*")?>(.*)</a>', r'\1', ligne)
65
        ligne = re.sub(r'<a', r'', ligne)
66
#        ligne = re.sub(r'href="(.*)"', r'\1', ligne)
67
        ligne = re.sub(r'</a>', r'', ligne)
68

    
69
        output += ligne
70

    
71
    p.feed(output)
72
    list_node = list()
73
    for comment in p.data:
74
        comment = re.sub(r'^\n*', r'', comment)
75
        comment = re.sub(r'\n*$', r'', comment)
76
        comment = re.sub(r'^ *', r'', comment)
77
        comment = re.sub(r' *$', r'', comment)
78
        comment = re.sub(r'/balise_pre', r'</pre>', comment)
79
        comment = re.sub(r'balise_pre', r'<pre>', comment)
80
        comment = re.sub(r'/balise_code', r'</code> ', comment)
81
        comment = re.sub(r'balise_code', r'<code> ', comment)
82
        if comment:
83
            list_node.append(comment)
84

    
85
    i = 1
86
    drupal_respo = list()
87
    while "Avancement" not in list_node[i]:
88
        drupal_respo.append(list_node[i])
89
        i += 1
90

    
91
    i += 1
92
    drupal_done_ratio = list()
93
    while "Priorité" not in list_node[i]:
94
        drupal_done_ratio.append(list_node[i])
95
        i += 1
96

    
97
    #on supprime attente des anciens/cri et à tester (à ajouter à la main)
98
    for elt in drupal_done_ratio:
99
        if "tester" in elt or 'Attente' in elt:
100
            del elt
101

    
102
    #On se préocupe du status_id qui est dans la liste drupal_done_ratio
103
    drupal_status = ''
104
    for elt in drupal_done_ratio:
105
        if "Terminée" in elt:
106
            drupal_status = 'Fermée'
107
            break
108
        elif "Fermée" in elt:
109
            drupal_status = 'Rejetée'
110
            break
111
        elif "pause" in elt:
112
            drupal_status = 'En pause'
113
            del elt
114
            break
115

    
116
    #on vérifie que drupal_done_ratio n’est pas vide
117
    if not drupal_done_ratio:
118
        drupal_done_ratio = ['Entamée']
119

    
120
    i += 1
121
    drupal_priority = list_node[i]
122

    
123
    i += 1
124
    drupal_files = list()
125
    while "Version" not in list_node[i]:
126
        drupal_files.append(list_node[i])
127
        i += 1
128

    
129
    i += 1
130
    drupal_version = list_node[i]
131
    drupal_body = list_node[i + 1]
132

    
133
    tache = {}
134
    tache['project_id'] = 1
135
    tache['tracker_id'] = 2
136
    tache['subject'] = drupal_title[0]
137
    tache['description'] = drupal_body
138
    tache['priority_id'] = PRIORITY[drupal_priority]
139
    tache['done_ratio'] = DONE_RATIO[drupal_done_ratio[0]]
140
    tache['status_id'] = STATUS[drupal_status]
141
    if '7' in drupal_version:
142
        tache['custom_fields'] = [{"id":1, "value": "1"}]
143
    else:
144
        tache['custom_fields'] = [{"id":1, "value": "2"}]
145
        #print(drupal_files)
146
    return tache