Révision f5cb48cd
Ajouté par Julien Enselme il y a presque 11 ans
scripts_divers/migrer_taches_vers_redmine/comments_parser.py | ||
---|---|---|
1 |
from html.parser import HTMLParser |
|
2 |
import re |
|
3 |
|
|
4 |
class LinksParser(HTMLParser): |
|
5 |
def __init__(self): |
|
6 |
HTMLParser.__init__(self) |
|
7 |
self.recording = 0 |
|
8 |
self.data = [] |
|
9 |
|
|
10 |
def handle_starttag(self, tag, attributes): |
|
11 |
if tag != 'div': |
|
12 |
return |
|
13 |
if self.recording: |
|
14 |
self.recording += 1 |
|
15 |
return |
|
16 |
for name, value in attributes: |
|
17 |
if name == 'class' and value == 'comment-content': |
|
18 |
break |
|
19 |
else: |
|
20 |
return |
|
21 |
self.recording = 1 |
|
22 |
|
|
23 |
def handle_endtag(self, tag): |
|
24 |
if tag == 'div' and self.recording: |
|
25 |
self.recording -= 1 |
|
26 |
|
|
27 |
def handle_data(self, data): |
|
28 |
if self.recording: |
|
29 |
self.data.append(data) |
|
30 |
|
|
31 |
|
|
32 |
def parse_comment(input): |
|
33 |
p = LinksParser() |
|
34 |
|
|
35 |
output = "" |
|
36 |
for ligne in input: |
|
37 |
ligne = re.sub(r'<p( class="[a-z -]*")?( id="[a-z]*")?>', r'', ligne) |
|
38 |
# ligne = re.sub(r'<p(.*)>', r'', ligne) |
|
39 |
ligne = re.sub(r'</p>', r'', ligne) |
|
40 |
ligne = re.sub(r'<h([1-9])( class="[a-z -]*")?( id="[a-z]*")?( rel="nofollow")?>\n?', r'\nh\1. ', ligne) |
|
41 |
ligne = re.sub(r'</h[1-9]>', r'\n', ligne) |
|
42 |
ligne = re.sub(r'^[ \t]+', '', ligne) |
|
43 |
ligne = re.sub(r'<br />', r'\n', ligne) |
|
44 |
ligne = re.sub(r'<li( class="[a-z -]*")?( id="[a-z]*")?>', r'', ligne) |
|
45 |
ligne = re.sub(r'(.*)</li>', r'# \1', ligne) |
|
46 |
ligne = re.sub(r'<ol( class="[a-z -]*")?( id="[a-z]*")?>', r'', ligne) |
|
47 |
ligne = re.sub(r'</ol>', r'', ligne) |
|
48 |
ligne = re.sub(r'<ul( class="[a-z -]*")?( id="[a-z]*")?>', r'', ligne) |
|
49 |
ligne = re.sub(r'</ul>', r'', ligne) |
|
50 |
ligne = re.sub(r'<pre( class="[a-z -]*")?( id="[a-z]*")?>', r'balise_pre', ligne) |
|
51 |
ligne = re.sub(r'</pre>', r'/balise_pre', ligne) |
|
52 |
ligne = re.sub(r'<code( class="[a-z -]*")?( id="[a-z]*")?>', r'balise_code', ligne) |
|
53 |
ligne = re.sub(r'</code>', r'/balise_code', ligne) |
|
54 |
ligne = re.sub(r'<em( class="[a-z -]*")?( id="[a-z]*")?>', r'_', ligne) |
|
55 |
ligne = re.sub(r'</em>', r'_', ligne) |
|
56 |
ligne = re.sub(r'<b( class="[a-z -]*")?( id="[a-z]*")?>', r'*', ligne) |
|
57 |
ligne = re.sub(r'</b>', r'*', ligne) |
|
58 |
ligne = re.sub(r'<strong( class="[a-z -]*")?( id="[a-z]*")?>', r'*', ligne) |
|
59 |
ligne = re.sub(r'</strong>', r'*', ligne) |
|
60 |
# ligne = re.sub(r'<a href="(.*)"( class="[a-z -]*")?( id="[a-z]*")?>(.*)</a>', r'\1', ligne) |
|
61 |
ligne = re.sub(r'<a', r'', ligne) |
|
62 |
ligne = re.sub(r'href="(.*)"', r'\1', ligne) |
|
63 |
ligne = re.sub(r'</a>', r'', ligne) |
|
64 |
output += ligne |
|
65 |
a = open("k", 'w') |
|
66 |
a.write(output) |
|
67 |
|
|
68 |
p.feed(output) |
|
69 |
|
|
70 |
list_comments = list() |
|
71 |
for comment in p.data: |
|
72 |
comment = re.sub(r'^\n*', r'', comment) |
|
73 |
comment = re.sub(r'\n*$', r'', comment) |
|
74 |
comment = re.sub(r'^ *', r'', comment) |
|
75 |
comment = re.sub(r' *$', r'', comment) |
|
76 |
comment = re.sub(r'/balise_pre', r'</pre>', comment) |
|
77 |
comment = re.sub(r'balise_pre', r'<pre>', comment) |
|
78 |
comment = re.sub(r'/balise_code', r'</code> ', comment) |
|
79 |
comment = re.sub(r'balise_code', r'<code> ', comment) |
|
80 |
if comment: |
|
81 |
list_comments.append(comment) |
|
82 |
return list_comments |
scripts_divers/migrer_taches_vers_redmine/node_parser.py | ||
---|---|---|
1 |
from html.parser import HTMLParser |
|
2 |
import re |
|
3 |
|
|
4 |
DONE_RATIO = {'À commencer': 0, 'Entamée': 20, 'Bien avancé': 80, 'Terminée (success)': 100, 'Fermée (won\'t fix)': 100} |
|
5 |
PRIORITY = {'5 - Très basse': 3, '4 - Basse': 3, '3 - Moyenne': 4, '2 - Haute': 5, '1 - Très haute': 6} |
|
6 |
STATUS = {'En cours': 2, 'Fermée': 5, 'Rejetée': 6, 'En pause': 7} |
|
7 |
|
|
8 |
class LinksParser(HTMLParser): |
|
9 |
def __init__(self): |
|
10 |
HTMLParser.__init__(self) |
|
11 |
self.recording = 0 |
|
12 |
self.data = [] |
|
13 |
|
|
14 |
def handle_starttag(self, tag, attributes): |
|
15 |
if tag != 'div': |
|
16 |
return |
|
17 |
if self.recording: |
|
18 |
self.recording += 1 |
|
19 |
return |
|
20 |
for name, value in attributes: |
|
21 |
if name == 'class' and value == 'node-content': |
|
22 |
break |
|
23 |
else: |
|
24 |
return |
|
25 |
self.recording = 1 |
|
26 |
|
|
27 |
def handle_endtag(self, tag): |
|
28 |
if tag == 'div' and self.recording: |
|
29 |
self.recording -= 1 |
|
30 |
|
|
31 |
def handle_data(self, data): |
|
32 |
if self.recording: |
|
33 |
self.data.append(data) |
|
34 |
|
|
35 |
def parse_node(input): |
|
36 |
p = LinksParser() |
|
37 |
output = "" |
|
38 |
drupal_title = '' |
|
39 |
for ligne in input: |
|
40 |
if not drupal_title: |
|
41 |
drupal_title = re.findall(r'<title>(.*?)</title>', ligne) |
|
42 |
ligne = re.sub(r'<p( class="[a-z -]*")?( id="[a-z]*")?>', r'', ligne) |
|
43 |
ligne = re.sub(r'</p>', r'', ligne) |
|
44 |
ligne = re.sub(r'<h([1-9])( class="[a-z -]*")?( id="[a-z]*")?( rel="nofollow")?>\n?', r'\nh\1. ', ligne) |
|
45 |
ligne = re.sub(r'</h[1-9]>', r'\n', ligne) |
|
46 |
ligne = re.sub(r'^[ \t]+', '', ligne) |
|
47 |
ligne = re.sub(r'<br />', r'\n', ligne) |
|
48 |
ligne = re.sub(r'<li( class="[a-z -]*")?( id="[a-z]*")?>', r'', ligne) |
|
49 |
ligne = re.sub(r'(.*)</li>', r'# \1', ligne) |
|
50 |
ligne = re.sub(r'<ol( class="[a-z -]*")?( id="[a-z]*")?>', r'', ligne) |
|
51 |
ligne = re.sub(r'</ol>', r'', ligne) |
|
52 |
ligne = re.sub(r'<ul( class="[a-z -]*")?( id="[a-z]*")?>', r'', ligne) |
|
53 |
ligne = re.sub(r'</ul>', r'', ligne) |
|
54 |
ligne = re.sub(r'<pre( class="[a-z -]*")?( id="[a-z]*")?>', r'balise_pre', ligne) |
|
55 |
ligne = re.sub(r'</pre>', r'/balise_pre', ligne) |
|
56 |
ligne = re.sub(r'<code( class="[a-z -]*")?( id="[a-z]*")?>', r'balise_code', ligne) |
|
57 |
ligne = re.sub(r'</code>', r'/balise_code', ligne) |
|
58 |
ligne = re.sub(r'<em( class="[a-z -]*")?( id="[a-z]*")?>', r'_', ligne) |
|
59 |
ligne = re.sub(r'</em>', r'_', ligne) |
|
60 |
ligne = re.sub(r'<b( class="[a-z -]*")?( id="[a-z]*")?>', r'*', ligne) |
|
61 |
ligne = re.sub(r'</b>', r'*', ligne) |
|
62 |
ligne = re.sub(r'<strong( class="[a-z -]*")?( id="[a-z]*")?>', r'*', ligne) |
|
63 |
ligne = re.sub(r'</strong>', r'*', ligne) |
|
64 |
# # ligne = re.sub(r'<a href="(.*)"( class="[a-z -]*")?( id="[a-z]*")?>(.*)</a>', r'\1', ligne) |
|
65 |
ligne = re.sub(r'<a', r'', ligne) |
|
66 |
# ligne = re.sub(r'href="(.*)"', r'\1', ligne) |
|
67 |
ligne = re.sub(r'</a>', r'', ligne) |
|
68 |
|
|
69 |
output += ligne |
|
70 |
|
|
71 |
p.feed(output) |
|
72 |
list_node = list() |
|
73 |
for comment in p.data: |
|
74 |
comment = re.sub(r'^\n*', r'', comment) |
|
75 |
comment = re.sub(r'\n*$', r'', comment) |
|
76 |
comment = re.sub(r'^ *', r'', comment) |
|
77 |
comment = re.sub(r' *$', r'', comment) |
|
78 |
comment = re.sub(r'/balise_pre', r'</pre>', comment) |
|
79 |
comment = re.sub(r'balise_pre', r'<pre>', comment) |
|
80 |
comment = re.sub(r'/balise_code', r'</code> ', comment) |
|
81 |
comment = re.sub(r'balise_code', r'<code> ', comment) |
|
82 |
if comment: |
|
83 |
list_node.append(comment) |
|
84 |
|
|
85 |
i = 1 |
|
86 |
drupal_respo = list() |
|
87 |
while "Avancement" not in list_node[i]: |
|
88 |
drupal_respo.append(list_node[i]) |
|
89 |
i += 1 |
|
90 |
|
|
91 |
i += 1 |
|
92 |
drupal_done_ratio = list() |
|
93 |
while "Priorité" not in list_node[i]: |
|
94 |
drupal_done_ratio.append(list_node[i]) |
|
95 |
i += 1 |
|
96 |
|
|
97 |
#on supprime attente des anciens/cri et à tester (à ajouter à la main) |
|
98 |
for elt in drupal_done_ratio: |
|
99 |
if "tester" in elt or 'Attente' in elt: |
|
100 |
del elt |
|
101 |
|
|
102 |
#On se préocupe du status_id qui est dans la liste drupal_done_ratio |
|
103 |
drupal_status = '' |
|
104 |
for elt in drupal_done_ratio: |
|
105 |
if "Terminée" in elt: |
|
106 |
drupal_status = 'Fermée' |
|
107 |
break |
|
108 |
elif "Fermée" in elt: |
|
109 |
drupal_status = 'Rejetée' |
|
110 |
break |
|
111 |
elif "pause" in elt: |
|
112 |
drupal_status = 'En pause' |
|
113 |
del elt |
|
114 |
break |
|
115 |
|
|
116 |
#on vérifie que drupal_done_ratio n’est pas vide |
|
117 |
if not drupal_done_ratio: |
|
118 |
drupal_done_ratio = ['Entamée'] |
|
119 |
|
|
120 |
i += 1 |
|
121 |
drupal_priority = list_node[i] |
|
122 |
|
|
123 |
i += 1 |
|
124 |
drupal_files = list() |
|
125 |
while "Version" not in list_node[i]: |
|
126 |
drupal_files.append(list_node[i]) |
|
127 |
i += 1 |
|
128 |
|
|
129 |
i += 1 |
|
130 |
drupal_version = list_node[i] |
|
131 |
drupal_body = list_node[i + 1] |
|
132 |
|
|
133 |
tache = {} |
|
134 |
tache['project_id'] = 1 |
|
135 |
tache['tracker_id'] = 2 |
|
136 |
tache['subject'] = drupal_title[0] |
|
137 |
tache['description'] = drupal_body |
|
138 |
tache['priority_id'] = PRIORITY[drupal_priority] |
|
139 |
tache['done_ratio'] = DONE_RATIO[drupal_done_ratio[0]] |
|
140 |
tache['status_id'] = STATUS[drupal_status] |
|
141 |
if '7' in drupal_version: |
|
142 |
tache['custom_fields'] = [{"id":1, "value": "1"}] |
|
143 |
else: |
|
144 |
tache['custom_fields'] = [{"id":1, "value": "2"}] |
|
145 |
#print(drupal_files) |
|
146 |
return tache |
scripts_divers/migrer_taches_vers_redmine/submiters_parser.py | ||
---|---|---|
1 |
from html.parser import HTMLParser |
|
2 |
|
|
3 |
class LinksParser(HTMLParser): |
|
4 |
def __init__(self): |
|
5 |
HTMLParser.__init__(self) |
|
6 |
self.recording = 0 |
|
7 |
self.data = [] |
|
8 |
|
|
9 |
def handle_starttag(self, tag, attributes): |
|
10 |
if tag != 'a': |
|
11 |
return |
|
12 |
if self.recording: |
|
13 |
self.recording += 1 |
|
14 |
return |
|
15 |
for name, value in attributes: |
|
16 |
if name == 'class' and value == 'username': |
|
17 |
break |
|
18 |
else: |
|
19 |
return |
|
20 |
self.recording = 1 |
|
21 |
|
|
22 |
def handle_endtag(self, tag): |
|
23 |
if tag == 'a' and self.recording: |
|
24 |
self.recording -= 1 |
|
25 |
|
|
26 |
def handle_data(self, data): |
|
27 |
if self.recording: |
|
28 |
self.data.append(data) |
|
29 |
|
|
30 |
def parse_submiters(input): |
|
31 |
p = LinksParser() |
|
32 |
|
|
33 |
output = "" |
|
34 |
|
|
35 |
for ligne in input: |
|
36 |
output += ligne |
|
37 |
print(output) |
|
38 |
p.feed(output) |
|
39 |
|
|
40 |
#le premier est celui qui a créé la tache et le dernier est potentiellement celui qui n’a pas encore posté de commentaires |
|
41 |
return p.data |
Formats disponibles : Unified diff
Suppression des fichiers inutiles