1
|
from html.parser import HTMLParser
|
2
|
import re
|
3
|
|
4
|
DONE_RATIO = {'À commencer': 0, 'Entamée': 20, 'Bien avancé': 80, 'Terminée (success)': 100, 'Fermée (won\'t fix)': 100}
|
5
|
PRIORITY = {'5 - Très basse': 3, '4 - Basse': 3, '3 - Moyenne': 4, '2 - Haute': 5, '1 - Très haute': 6}
|
6
|
STATUS = {'En cours': 2, 'Fermée': 5, 'Rejetée': 6, 'En pause': 7}
|
7
|
|
8
|
class LinksParser(HTMLParser):
|
9
|
def __init__(self):
|
10
|
HTMLParser.__init__(self)
|
11
|
self.recording = 0
|
12
|
self.data = []
|
13
|
|
14
|
def handle_starttag(self, tag, attributes):
|
15
|
if tag != 'div':
|
16
|
return
|
17
|
if self.recording:
|
18
|
self.recording += 1
|
19
|
return
|
20
|
for name, value in attributes:
|
21
|
if name == 'class' and value == 'node-content':
|
22
|
break
|
23
|
else:
|
24
|
return
|
25
|
self.recording = 1
|
26
|
|
27
|
def handle_endtag(self, tag):
|
28
|
if tag == 'div' and self.recording:
|
29
|
self.recording -= 1
|
30
|
|
31
|
def handle_data(self, data):
|
32
|
if self.recording:
|
33
|
self.data.append(data)
|
34
|
|
35
|
def parse_node(input):
|
36
|
p = LinksParser()
|
37
|
output = ""
|
38
|
drupal_title = ''
|
39
|
for ligne in input:
|
40
|
if not drupal_title:
|
41
|
drupal_title = re.findall(r'<title>(.*?)</title>', ligne)
|
42
|
ligne = re.sub(r'<p( class="[a-z -]*")?( id="[a-z]*")?>', r'', ligne)
|
43
|
ligne = re.sub(r'</p>', r'', ligne)
|
44
|
ligne = re.sub(r'<h([1-9])( class="[a-z -]*")?( id="[a-z]*")?( rel="nofollow")?>\n?', r'\nh\1. ', ligne)
|
45
|
ligne = re.sub(r'</h[1-9]>', r'\n', ligne)
|
46
|
ligne = re.sub(r'^[ \t]+', '', ligne)
|
47
|
ligne = re.sub(r'<br />', r'\n', ligne)
|
48
|
ligne = re.sub(r'<li( class="[a-z -]*")?( id="[a-z]*")?>', r'', ligne)
|
49
|
ligne = re.sub(r'(.*)</li>', r'# \1', ligne)
|
50
|
ligne = re.sub(r'<ol( class="[a-z -]*")?( id="[a-z]*")?>', r'', ligne)
|
51
|
ligne = re.sub(r'</ol>', r'', ligne)
|
52
|
ligne = re.sub(r'<ul( class="[a-z -]*")?( id="[a-z]*")?>', r'', ligne)
|
53
|
ligne = re.sub(r'</ul>', r'', ligne)
|
54
|
ligne = re.sub(r'<pre( class="[a-z -]*")?( id="[a-z]*")?>', r'balise_pre', ligne)
|
55
|
ligne = re.sub(r'</pre>', r'/balise_pre', ligne)
|
56
|
ligne = re.sub(r'<code( class="[a-z -]*")?( id="[a-z]*")?>', r'balise_code', ligne)
|
57
|
ligne = re.sub(r'</code>', r'/balise_code', ligne)
|
58
|
ligne = re.sub(r'<em( class="[a-z -]*")?( id="[a-z]*")?>', r'_', ligne)
|
59
|
ligne = re.sub(r'</em>', r'_', ligne)
|
60
|
ligne = re.sub(r'<b( class="[a-z -]*")?( id="[a-z]*")?>', r'*', ligne)
|
61
|
ligne = re.sub(r'</b>', r'*', ligne)
|
62
|
ligne = re.sub(r'<strong( class="[a-z -]*")?( id="[a-z]*")?>', r'*', ligne)
|
63
|
ligne = re.sub(r'</strong>', r'*', ligne)
|
64
|
|
65
|
ligne = re.sub(r'<a', r'', ligne)
|
66
|
|
67
|
ligne = re.sub(r'</a>', r'', ligne)
|
68
|
|
69
|
output += ligne
|
70
|
|
71
|
p.feed(output)
|
72
|
list_node = list()
|
73
|
for comment in p.data:
|
74
|
comment = re.sub(r'^\n*', r'', comment)
|
75
|
comment = re.sub(r'\n*$', r'', comment)
|
76
|
comment = re.sub(r'^ *', r'', comment)
|
77
|
comment = re.sub(r' *$', r'', comment)
|
78
|
comment = re.sub(r'/balise_pre', r'</pre>', comment)
|
79
|
comment = re.sub(r'balise_pre', r'<pre>', comment)
|
80
|
comment = re.sub(r'/balise_code', r'</code> ', comment)
|
81
|
comment = re.sub(r'balise_code', r'<code> ', comment)
|
82
|
if comment:
|
83
|
list_node.append(comment)
|
84
|
|
85
|
i = 1
|
86
|
drupal_respo = list()
|
87
|
while "Avancement" not in list_node[i]:
|
88
|
drupal_respo.append(list_node[i])
|
89
|
i += 1
|
90
|
|
91
|
i += 1
|
92
|
drupal_done_ratio = list()
|
93
|
while "Priorité" not in list_node[i]:
|
94
|
drupal_done_ratio.append(list_node[i])
|
95
|
i += 1
|
96
|
|
97
|
|
98
|
for elt in drupal_done_ratio:
|
99
|
if "tester" in elt or 'Attente' in elt:
|
100
|
del elt
|
101
|
|
102
|
|
103
|
drupal_status = ''
|
104
|
for elt in drupal_done_ratio:
|
105
|
if "Terminée" in elt:
|
106
|
drupal_status = 'Fermée'
|
107
|
break
|
108
|
elif "Fermée" in elt:
|
109
|
drupal_status = 'Rejetée'
|
110
|
break
|
111
|
elif "pause" in elt:
|
112
|
drupal_status = 'En pause'
|
113
|
del elt
|
114
|
break
|
115
|
|
116
|
|
117
|
if not drupal_done_ratio:
|
118
|
drupal_done_ratio = ['Entamée']
|
119
|
|
120
|
i += 1
|
121
|
drupal_priority = list_node[i]
|
122
|
|
123
|
i += 1
|
124
|
drupal_files = list()
|
125
|
while "Version" not in list_node[i]:
|
126
|
drupal_files.append(list_node[i])
|
127
|
i += 1
|
128
|
|
129
|
i += 1
|
130
|
drupal_version = list_node[i]
|
131
|
drupal_body = list_node[i + 1]
|
132
|
|
133
|
tache = {}
|
134
|
tache['project_id'] = 1
|
135
|
tache['tracker_id'] = 2
|
136
|
tache['subject'] = drupal_title[0]
|
137
|
tache['description'] = drupal_body
|
138
|
tache['priority_id'] = PRIORITY[drupal_priority]
|
139
|
tache['done_ratio'] = DONE_RATIO[drupal_done_ratio[0]]
|
140
|
tache['status_id'] = STATUS[drupal_status]
|
141
|
if '7' in drupal_version:
|
142
|
tache['custom_fields'] = [{"id":1, "value": "1"}]
|
143
|
else:
|
144
|
tache['custom_fields'] = [{"id":1, "value": "2"}]
|
145
|
|
146
|
return tache
|