-
Notifications
You must be signed in to change notification settings - Fork 10
/
sedsed.py
executable file
·1245 lines (1037 loc) · 40.1 KB
/
sedsed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
# sedsed - Debugger and code formatter for sed scripts
# Since 27 November 2001, by Aurelio Jargas
# pylint: disable=invalid-name
# pylint: disable=redefined-outer-name
# pylint: disable=too-many-branches
# pylint: disable=too-many-lines
# pylint: disable=too-many-locals
# pylint: disable=too-many-statements
from __future__ import print_function # pylint: disable=unused-variable
import sys
import re
import os
import getopt
import tempfile
# sedparse is a translation to Python of the GNU sed parser C code
# https://github.com/aureliojargas/sedparse
import sedparse
__version__ = "2.0.0"
myname = "sedsed"
myhome = "https://aurelio.net/projects/sedsed/"
# Default config
# fmt: off
sedbin = "sed" # name (or full path) of the sed program
color = 1 # colored output or not? (--color, --nocolor)
dump_debug = 0 # dump debug script to screen? (--dump-debug)
indent_prefix = " "*4 # default indent prefix for blocks (--prefix)
debug_prefix = "\t\t" # default prefix for debug commands
action = "indent" # default action if none specified (-d,-i,-t,-H)
DEBUG = 0 # set developer's debug level [0-3]
# fmt: on
# HTML data for --htmlize
# You may edit here to change the defaults
html_colors = {
# fmt: off
"addr1": "#8080ff",
"addr1flag": "#ff6060",
"addr2": "#8080ff",
"addr2flag": "#ff6060",
"lastaddr": "",
"modifier": "#ff6060",
"id": "#ffff00",
"content": "#ff00ff",
"delimiter": "#ff6060",
"pattern": "#8080ff",
"replace": "",
"flag": "#00ff00",
"comment": "#00ffff",
"escape": "#ff6060",
"special": "#00ff00",
"pattmeta": "#ff00ff",
"plaintext": "",
"branch": "",
"BGCOLOR": "#000000",
"TEXT": "#ffffff",
"LINK": "#ff00ff",
"ALINK": "#ff00ff",
"VLINK": "#ff00ff"
}
# Note that the %s will be expanded later
html_header = """\
<html>
<head><meta name="Generator" content="sedsed --htmlize">
<title>Colorized %s</title></head>
<body bgcolor="{BGCOLOR}" text="{TEXT}"
link="{LINK}" alink="{ALINK}" vlink="{VLINK}">
<pre>\
""".format(
**html_colors
)
html_footer = """
<font color="{}"><b>### colorized by <a href="{}">sedsed</a>, \
a debugger and code formatter for sed scripts</b></font>
</pre></body></html>\
""".format(
html_colors["comment"], myhome
)
# sedsed expects multiline text (aic text, s/// replacement) to have this
# odd string instead of inner \n's in the string
linesep = "@#linesep#@"
# Data holders that will be set by command line options
# fmt: off
action_modifiers = [] # --hide contents and others
sedscript = [] # join all scripts found here
script_file = "" # last sedscript filename for --htmlize
quiet_flag = 0 # tell if the #n is needed or not
textfiles = []
# fmt: on
# Color-related variables, will be set in set_colors()
color_YLW = ""
color_RED = ""
color_REV = ""
color_NO = ""
# Color-dependent variable, will be set after the command line parsing
newlineshow = ""
# Debug-related variables, will be set by set_debug_commands()
showpatt = ""
showhold = ""
save_t = ""
showcomm = ""
nullcomm = ""
# Regex to match the shebang, grouping the sed options
topopts_regex = r"#!\s*/[^\s]+\s+-([nf]+)"
# All sed commands grouped by kind
sedcmds = {
"file": "rw" + "RW", # standard + GNU sed
"multi": "sy",
"solo": "=dDgGhHnNpPx" + "Fz", # standard + GNU sed
"text": "aci" + "e", # standard + GNU sed
"jump": ":bt" + "T", # standard + GNU sed
"block": "{}",
"int": "qQlL", # standard (q, l), GNU sed (q<n>, l<n>, Q, L)
"misc": "v", # GNU sed
}
# All fields used by the sedsed AST dictionary
cmdfields = [
"linenr",
"addr1",
"addr1flag",
"addr2",
"addr2flag",
"lastaddr",
"modifier",
"id",
"content",
"delimiter",
"pattern",
"replace",
"flag",
"comment",
]
# -----------------------------------------------------------------------------
# Special adjustments
# -----------------------------------------------------------------------------
# The identifier recognized by sed as STDIN
# - BSD sed does not support '-'
# - Windows, Termux and others do not have /dev/stdin
if os.path.exists("/dev/stdin"):
stdin_id = "/dev/stdin"
else:
stdin_id = "-"
# Turn color OFF on Windows because ANSI.SYS is not installed by default.
# Windows users who have ANSI.SYS configured, can use the --color option
# or comment the following line.
# ANSI.SYS resources:
# http://www.evergreen.edu/biophysics/technotes/program/ansi_esc.htm#notes
# http://www3.sympatico.ca/rhwatson/dos7/v-ansi-escseq.html
if os.name == "nt":
color = 0
# -----------------------------------------------------------------------------
# General Functions
# -----------------------------------------------------------------------------
def print_usage(exitcode=1):
print(
"""
Usage: sedsed OPTION [-e sedscript] [-f sedscriptfile] [inputfile]
OPTIONS:
-f, --file add file contents to the commands to be parsed
-e, --expression add the script to the commands to be parsed
-n, --quiet suppress automatic printing of pattern space
--silent alias to --quiet
-d, --debug debug the sed script
--hide hide some debug info (options: PATT,HOLD,COMM)
--color shows debug output in colors (default: ON)
--nocolor no colors on debug output
--dump-debug dumps to screen the debugged sed script
-i, --indent script beautifier, prints indented and
one-command-per-line output do STDOUT
--prefix indent prefix string (default: 4 spaces)
--sedbin specify sed executable (name or full path)
-t, --tokenize script tokenizer, prints extensive
command by command information
-H, --htmlize converts sed script to a colorful HTML page
-V, --version prints the program version and exit
-h, --help prints this help message and exit
"""
)
print("Website: %s\n" % myhome)
sys.exit(exitcode)
def fatal_error(msg):
"All error messages are handled by me"
print("ERROR: %s: %s" % (myname, msg), file=sys.stderr)
sys.exit(1)
def echo(msg): # pylint: disable=unused-variable
print("\033[33;1m%s\033[m" % msg)
def devdebug(msg, level=1): # pylint: disable=unused-variable
if DEBUG and DEBUG >= level:
print("+++ DEBUG%d: %s" % (level, msg))
def read_file(file_path):
"Reads a file into a list, removing line breaks"
if file_path in (stdin_id, "-"):
try:
data = sys.stdin.readlines()
except KeyboardInterrupt: # ^C
sys.exit(1)
# Ideally the exit code should be 128+signal.SIGINT in Unix, but
# I'm not sure about other platforms. So I'll keep it simple.
else:
try:
with open(file_path) as f:
data = f.readlines()
except IOError as e:
fatal_error("Cannot read file: %s\n%s" % (file_path, e))
return [re.sub("[\n\r]+$", "", x) for x in data]
def write_file(file_path, lines):
"Writes a list contents into file, adding correct line breaks"
try:
with open(file_path, "w") as f:
# TODO maybe use os.linesep? - all this is really necessary?
# ensuring line break
lines = [re.sub("\n$", "", x) + "\n" for x in lines]
f.writelines(lines)
except IOError as e:
fatal_error("Cannot write file: %s\n%s" % (file_path, e))
def system_command(cmd):
"Returns a (#exit_code, program_output[]) tuple"
ret = None
output = []
fd = os.popen(cmd)
for line in fd.readlines():
output.append(line.rstrip()) # stripping \s*\n
ret = fd.close()
if ret:
ret = ret / 256 # 16bit number
return ret, output
def validate_script_syntax(script_text):
"""Validate a sed script using system's sed."""
# Using tmpfile2 because "sed -f script /dev/null" won't work in Windows
tmpfile1 = tempfile.mktemp()
tmpfile2 = tempfile.mktemp()
write_file(tmpfile1, script_text)
write_file(tmpfile2, "")
# Note that even when running against an empty file, there could be
# consequences on the system, such as a 'w' command writing files.
# sed -f sed_script empty_file
ret, _ = system_command("%s -f '%s' '%s'" % (sedbin, tmpfile1, tmpfile2))
os.remove(tmpfile1)
os.remove(tmpfile2)
# The sed command will fail when there's something wrong:
# - syntax error
# - unknown command
# - permission denied for file read/write commands (r, w, s///w)
# Example: touch a; chmod 000 a; sedsed -d -e 'w a'
if ret:
# At this point, the sed error message was already shown to the user,
# explaining the reason for the failure. So now we abort giving some
# context of what we were trying to do.
fatal_error(
"%d: Failed validating your script using system's sed: %s" % (ret, sedbin)
)
def set_colors():
# pylint: disable=global-statement
global color_YLW
global color_RED
global color_REV
global color_NO
# Add the terminal escapes for color (or not):
# yellow text, red text, reverse video, back to default
if color:
color_YLW = "\033[33;1m"
color_RED = "\033[31;1m"
color_REV = "\033[7m"
color_NO = "\033[m"
else:
color_YLW = color_RED = color_REV = color_NO = ""
# -----------------------------------------------------------------------------
# Command line
# -----------------------------------------------------------------------------
def parse_command_line(arguments=None):
# pylint: disable=global-statement, global-variable-not-assigned
global action
global action_modifiers
global color
global DEBUG
global dump_debug
global indent_prefix
global newlineshow
global quiet_flag
global script_file
global sedscript
global sedbin
global textfiles
arguments = arguments or sys.argv[1:]
# Here's all the valid command line options
short_options = "he:f:ditVHn"
long_options = [
# actions
"debug",
"tokenize",
"htmlize",
"indent",
# sed-like
"version",
"help",
"file=",
"expression=",
"silent",
"quiet",
# misc
"nocolor",
"color",
"hide=",
"prefix=",
"sedbin=",
# other
"dump-debug",
# admin
"_debuglevel=",
"_stdout-only",
"dumpcute",
]
# Check it!
try:
opt, args = getopt.getopt(arguments, short_options, long_options)
except getopt.error as errmsg:
fatal_error("%s (try --help)" % errmsg)
# Command Line is OK, now let's parse its values
for o in opt:
if o[0] in ("-d", "--debug"):
action = "debug"
elif o[0] in ("-i", "--indent"):
action = "indent"
color = 0
elif o[0] in ("-t", "--tokenize"):
action = "token"
color = 0
elif o[0] in ("-H", "--htmlize"):
action = "html"
color = 0
elif o[0] in ("-n", "--quiet", "--silent"):
quiet_flag = 1
elif o[0] in ("-e", "--expression"):
sedscript.extend(o[1].split("\n"))
elif o[0] in ("-f", "--file"):
sedscript.extend(read_file(o[1]))
script_file = o[1]
elif o[0] in ("-h", "--help"):
print_usage(0)
elif o[0] in ("-V", "--version"):
print("%s v%s" % (myname, __version__))
sys.exit(0)
elif o[0] == "--dump-debug":
action = "debug"
dump_debug = 1
color = 0
elif o[0] == "--nocolor":
color = 0
elif o[0] == "--color":
color = 1
elif o[0] == "--hide":
# --hide=comm,hold ==> action_modifiers = ['nocomm', 'nohold']
for hide in o[1].split(","):
hide_me = hide.strip().lower()
action_modifiers.append("no" + hide_me)
elif o[0] == "--prefix":
# Is the prefix valid?
if re.sub(r"\s", "", o[1]):
fatal_error("--prefix: must be spaces and/or TABs")
indent_prefix = o[1]
elif o[0] == "--sedbin":
sedbin = o[1]
# Undocumented admin options
elif o[0] == "--_debuglevel":
DEBUG = int(o[1])
elif o[0] == "--_stdout-only":
action = "debug"
action_modifiers.append(o[0][2:])
elif o[0] == "--dumpcute":
action = "dumpcute"
DEBUG = 0
color = 1
# There's a SED script?
if not sedscript:
if args:
# the script is the only argument (echo | sed 's///')
sedscript.append(args.pop(0))
else:
fatal_error("there's no SED script to parse! (try --help)")
# Get all text files, if none, use STDIN
textfiles = args or [stdin_id]
# All command line arguments were read and parsed. Now we need to do some
# adjustments in the data, based on the current config state.
# Add the leading #n to the sed script, when using -n
if quiet_flag:
sedscript.insert(0, "#n")
# At this point we know if colors are configured or not
set_colors()
# When showing the inner \n's to the user use this red \N
newlineshow = "%s\\N%s" % (color_RED, color_NO)
# The SED debugger magic lines
# ----------------------------
#
# Here is where the 'magic' lives. The heart of this program are the
# following lines, which are the special SED commands responsible for
# the DEBUG behaviour. For *each* command of the original script,
# several commands are added before, to show buffers and command
# contents. Some tricks are needed to preserve script's original
# behaviour, they are explained ahead.
#
# 1. Show PATTERN SPACE contents:
# The 'PATT:' prefix is added, then the 'l' command shows the
# buffer contents, then the prefix is removed.
#
# 2. Show HOLD SPACE contents:
# Similar to PATTERN SPACE, but use the 'x' command to access and
# restore the HOLD buffer contents. The prefix used is 'HOLD:'.
#
# 3. Show current SED COMMAND:
# Uses a single 'i' command to show the full 'COMM:' line, as it
# does not depend on execution data. The color codes are added or
# not, depending on user options.
#
# 4. 'Last Address' trick:
# On SED, the empty address // refers to the last address matched.
# As this behaviour can be affected when several DEBUG lines are
# inserted before the command, sedsed uses a trick to force it.
# The last address used on the original script is repeated with a
# null command (/last-address/ y/!/!/). This way sedsed repeat the
# addressing, ensuring the next command will have it as the right
# 'last' address.
#
# 5. 't Status' trick:
# The 't' command behaviour, from SED manual page:
#
# If a s/// has done a successful substitution since the last
# input line was read and since the last t command, then branch
# to label
#
# As all the DEBUG commands use lots of 's///' commands, the 't'
# status is always true. The trick here is to add fake labels
# between *any* command and fake 't' commands to jump to them:
#
# <last command, possibly s///>
# t zzset001
# ... debug commands ...
# t zzclr001
# : zzset001
# ... debug commands ...
# : zzclr001
# <next command, possibly t>
#
# The DEBUG commands are repeated and placed into two distinct
# blocks: 'zzset' and 'zzclr', which represents the 't' status
# of the last command. The execution order follows:
#
# zzset: 1st jump (t), then debug (s///), t status is ON
# zzclr: 1st debug (s///), then jump (t), t status is OFF
#
# The 001 count is incremented on each command to have unique
# labels.
#
# For the GNU sed 'T' command, the behaviour is the opposite: it only
# branches when there was *no* successful substitution. Luckily, the
# trick used for 't' applies to 'T' with no changes, because we can
# save and restore the correct last 's///' status.
#
#
# --- THANK YOU VERY MUCH ---
#
# - Paolo Bonzini (GNU sed 4.x maintainer) for the idea of the
# 't status' trick.
#
# - Thobias Salazar Trevisan for the idea of using the 'i'
# command for the COMM: lines.
#
def set_debug_commands():
# pylint: disable=global-statement
global showpatt
global showhold
global save_t
global showcomm
global nullcomm
# show pattern space, show hold space, show sed command
# null sed command to restore last address, 't' and 'T' status trick
# fmt: off
showpatt = [ "s/^/PATT:/", "l", "s/^PATT://" ]
showhold = ["x", "s/^/HOLD:/", "l", "s/^HOLD://", "x"]
showcomm = ["i\\", "COMM:%s\a%s" % (color_YLW, color_NO)]
nullcomm = ["y/!/!/"]
save_t = ["t zzset\a\n#DEBUG#", "t zzclr\a",
":zzset\a\n#DEBUG#", ":zzclr\a"]
# fmt: on
def format_debugcmds(cmds):
"One per line, with prefix (spaces)"
return debug_prefix + ("\n" + debug_prefix).join(cmds) + "\n"
showpatt = format_debugcmds(showpatt)
showhold = format_debugcmds(showhold)
save_t = format_debugcmds(save_t)
showcomm = debug_prefix + "\n".join(showcomm) + "\n"
nullcomm = nullcomm[0]
# If user specified --hide, unset DEBUG commands for them
if "nopatt" in action_modifiers:
showpatt = ""
if "nohold" in action_modifiers:
showhold = ""
if "nocomm" in action_modifiers:
showcomm = ""
# -----------------------------------------------------------------------------
# Auxiliary Functions - Tools
# -----------------------------------------------------------------------------
def escape_text_commands_specials(text):
text = text.replace("\\", "\\\\") # escape the escape
return text
def paint_html(element, txt=""):
if not txt:
return txt # nothing to paint
# Escape HTML special chars
txt = txt.replace("&", "&")
txt = txt.replace(">", ">")
txt = txt.replace("<", "<")
# Some color adjustments and emphasis
if element == "id" and txt in sedcmds["block"]:
element = "delimiter"
elif element == "id" and txt == ":":
element = "content"
elif element == "replace":
# highlight \n, & and \$
newtxt = paint_html("special", "\\" + linesep)
txt = txt.replace("\\" + linesep, newtxt)
txt = re.sub("(\\\\[1-9]|&)", paint_html("special", "\\1"), txt)
elif element == "pattern":
# highlight ( and |
txt = re.sub("(\\\\)([(|])", "\\1" + paint_html("pattmeta", "\\2"), txt)
elif element == "plaintext":
# highlight \$
newtxt = paint_html("special", "\\" + linesep)
txt = txt.replace("\\" + linesep, newtxt)
elif element == "branch":
# nice link to the label
txt = '<a href="#%s">%s</a>' % (txt, txt)
elif element == "target":
# link target
txt = '<a name="%s">%s</a>' % (txt, txt)
element = "content"
# Paint it!
if html_colors.get(element) and txt:
font_color = html_colors[element]
txt = '<font color="%s"><b>%s</b></font>' % (font_color, txt)
return txt
# -----------------------------------------------------------------------------
# Hardcore Address/Command Composer Functions
# -----------------------------------------------------------------------------
def compose_sed_address(data):
"""Format the full sed address as plain text or HTML."""
if not data["addr1"]:
return "" # no address
if action == "html":
address1 = "%s%s" % (
data["addr1html"],
paint_html("addr1flag", data.get("addr1flag")),
)
address2 = "%s%s" % (
data.get("addr2html"),
paint_html("addr2flag", data.get("addr2flag")),
)
else:
address1 = "%s%s" % (data.get("addr1"), data.get("addr1flag"))
address2 = "%s%s" % (data.get("addr2"), data.get("addr2flag"))
if data["addr2"]:
address = "%s,%s" % (address1, address2)
else:
address = address1
return address + " " # address, space, (command)
def compose_sed_command(data):
if data["delimiter"]: # s///
if action != "html":
cmd = "%s%s%s%s%s%s%s%s" % (
data["modifier"],
data["id"],
data["delimiter"],
data["pattern"],
data["delimiter"],
data["replace"],
data["delimiter"],
data["flag"],
)
if data["content"]: # s///w filename
cmd = cmd + " " + data["content"]
else:
cmd = """%s%s%s%s%s%s%s%s""" % (
# fmt: off
paint_html("modifier", data["modifier"]),
paint_html("id", data["id"]),
paint_html("delimiter", data["delimiter"]),
paint_html("pattern", data["pattern"]),
paint_html("delimiter", data["delimiter"]),
paint_html("replace", data["replace"]),
paint_html("delimiter", data["delimiter"]),
paint_html("flag", data["flag"]),
)
if data["content"]: # s///w filename
painted = paint_html("content", data["content"])
cmd = "%s %s" % (cmd, painted)
else:
idsep = ""
# spacer on r,w,b,t,v commands only
spaceme = sedcmds["file"] + sedcmds["jump"] + sedcmds["int"] + "v"
spaceme = spaceme.replace(":", "") # : label (no space!)
if data["id"] in spaceme and data["content"]:
idsep = " "
cmd = "%s%s%s%s" % (data["modifier"], data["id"], idsep, data["content"])
if action == "html":
if data["id"] in sedcmds["text"]:
content_type = "plaintext"
elif data["id"] in ("b", "t", "T"):
content_type = "branch"
elif data["id"] == ":":
content_type = "target"
else:
content_type = "content"
cmd = "%s%s%s%s" % (
paint_html("modifier", data["modifier"]),
paint_html("id", data["id"]),
idsep,
paint_html(content_type, data["content"]),
)
cmd = cmd.replace(linesep, "\n")
return cmd
# -----------------------------------------------------------------------------
# The dump* Functions - They 4mat 4you!
# -----------------------------------------------------------------------------
def dump_key_value_pair(datalist):
"Returns field:value command data line by line (lots of lines!)"
outlist = []
for data in datalist[1:]: # skip headers at 0
if not data["id"]: # blank line
continue
for key in datalist[0]["fields"]:
if key == "replace":
data[key] = data[key].replace(linesep, newlineshow)
outlist.append("%10s:%s" % (key, data[key]))
outlist.append("")
return outlist
# Format: line:ad1:ad1f:ad2:ad2f:mod:cmd:content:delim:patt:rplc:flag:comment
def dump_oneliner(datalist, fancy=0): # pylint: disable=unused-variable
"Returns a command per line, elements separated by : (looooong lines)"
outlist = []
r = n = ""
if fancy:
r = "\033[7m"
n = "\033[m"
for data in datalist[1:]: # skip headers at 0
outline = data["linenr"]
if data["id"]:
for key in datalist[0]["fields"][1:]: # skip linenr
outline = "%s:%s%s%s" % (outline, r, data[key], n)
outlist.append(outline)
return outlist
def dump_cute(datalist):
"Returns a strange representation of SED commands. Use --dumpcute."
outlist = []
r = color_REV
n = color_NO
for data in datalist[1:]: # skip headers at 0
if not data["id"]:
outlist.append("%40s" % "[blank]")
elif data["id"] == "#":
outlist.append(data["comment"])
else:
idsep = ""
if data["id"] in ("b", "t", "T"):
idsep = " "
cmd = "%s%s%s%s" % (data["modifier"], data["id"], idsep, data["content"])
if data["delimiter"]:
cmd = "%s%s%s%s%s%s%s" % (
cmd,
data["delimiter"],
data["pattern"],
data["delimiter"],
data["replace"],
data["delimiter"],
data["flag"],
)
cmd = cmd.replace(linesep, n + newlineshow + r)
outlist.append("%s" % "-" * 40)
outlist.append(
"adr: %s%s%s%s ::: %s%s%s%s"
% (
r,
data["addr1"],
data["addr1flag"],
n,
r,
data["addr2"],
data["addr2flag"],
n,
)
)
outlist.append("cmd: %s%s%s [%s]" % (r, cmd, n, data["comment"]))
return outlist
# dump_script: This is a handy function, used by --indent AND --htmlize
# It formats the SED script in a human-friendly way, with one command
# per line and adding spaces on the right places. If --htmlize, it
# also adds the HTML code to the script.
#
def dump_script(datalist, indent_prefix):
"Returns the indented script in plain text or HTML"
indfmt = {"string": indent_prefix, "initlevel": 0}
outlist = []
indent = indfmt["initlevel"]
if action == "html":
outlist.append(html_header % os.path.basename(script_file))
for data in datalist[1:]: # skip headers at 0
if not data["id"]: # blank line
outlist.append("")
continue
if data["id"] == "#":
indentstr = indfmt["string"] * indent
if action != "html":
outlist.append(indentstr + data["comment"])
else:
outlist.append(indentstr + paint_html("comment", data["comment"]))
else:
if data["id"] == "}":
indent = indent - 1
# only indent++ after open {
indentstr = indfmt["string"] * indent
if data["id"] == "{":
indent = indent + 1
cmd = compose_sed_command(data)
addr = compose_sed_address(data)
# saving full line
cmd = "%s%s%s" % (indentstr, addr, cmd)
if data["comment"]:
# Inline comments are aligned at column 40
# The leading ; before # is required by non-GNU seds
outlist.append("%-39s;%s" % (cmd, data["comment"]))
else:
outlist.append(cmd)
if action == "html":
outlist.append(html_footer)
return outlist
# -----------------------------------------------------------------------------
# do_debug - Here is where the fun begins
# -----------------------------------------------------------------------------
#
# This function performs the --debug action.
#
# After the SED script was parsed by the parser (below), this function
# is called with the script data found. It loops, shouts and screams,
# inserting the nice DEBUG lines between the SED script commands.
#
# After all lines are composed, it call the system's SED to run the
# script, and SED will do its job, but this time showing you all the
# secrets that the PATTERN SPACE and HOLD SPACE buffers holds.
#
def do_debug(datalist):
outlist = []
cmdlineopts = "f"
t_count = 0
hideregisters = 0
set_debug_commands()
if "topopts" in datalist[0]:
cmdlineopts = datalist[0]["topopts"]
# If we have at least one 't' or 'T' command on the script, we need
# to save the t command status between debug commands. As they perform
# s/// commands, the t status of the "last substitution" is lost.
# So, we save the status doing a nice loop trick before *every*
# command (necessary overhead). This loops uses the :zzsetNNN and
# zzclrNNN labels, where NNN is the label count.
# TIP: t status resets: line read, t call
if datalist[0]["has_t"]:
t_count = 1
for i, data in enumerate(datalist):
if i == 0:
continue # skip headers at 0
if not data["id"]:
continue # ignore blank line
if data["id"] == "#":
outlist.append("%s\n" % (data["comment"]))
else:
cmd = compose_sed_command(data)
addr = compose_sed_address(data)
cmdshow = cmd.replace("\n", newlineshow + color_YLW)
cmdshow = escape_text_commands_specials(addr + cmdshow)
showsedcmd = showcomm.replace("\a", cmdshow)
registers = showpatt + showhold
if hideregisters:
registers = ""
showall = "%s%s" % (registers, showsedcmd)
# Add the 't status' trick to commands.
# Exception: read-next-line commands (n,d,q)
# Exception: no PATT/HOLD registers to show (no s///)
if t_count and showall:
if data["id"] not in ("n", "d", "q") and registers:
tmp = save_t.replace("\a", "%03d" % t_count)
showall = tmp.replace("#DEBUG#", showall)
t_count = t_count + 1
# null cmd to restore last addr: /addr/y/!/!/
# Bug: https://github.com/aureliojargas/sedsed/issues/15
if data["lastaddr"]:
showall = showall + debug_prefix + data["lastaddr"] + nullcomm + "\n"
# after jump, block or void commands don't show
# registers, because they're not affected.
# exception: after b or t without target
# (read next line)
hideregisters = 0
if data["id"] in sedcmds["jump"] and data["content"]:
hideregisters = 1
elif data["id"] in sedcmds["block"]:
hideregisters = 1
elif data["id"] == "v":
hideregisters = 1
outlist.append("%s#%s\n%s\n" % (showall, "-" * 50, addr + cmd))
outlist.append(showpatt + showhold) # last line status
# executing sed script
cmdextra = ""
if "_stdout-only" in action_modifiers:
# cmdextra = "| egrep -v '^PATT|^HOLD|^COMM|\$$|\\$'" # sed
cmdextra = "-l 9999 | egrep -v '^PATT|^HOLD|^COMM'" # gsed
inputfiles = " ".join(textfiles)
if dump_debug:
for line in [re.sub("\n$", "", x) for x in outlist]:
print(line)
print(
"\n# Debugged SED script generated by %s-%s (%s)"
% (myname, __version__, myhome)
)
else:
tmpfile = tempfile.mktemp()
write_file(tmpfile, outlist)
os.system(
"%s -%s %s %s %s" % (sedbin, cmdlineopts, tmpfile, inputfiles, cmdextra)
)
os.remove(tmpfile)
###############################################################################
# #
# SED Script Parser #
# ------------------------- #
# Extract Every Info of Every Command #
# #
###############################################################################
#
# Here we used to have a custom brute force buggy parser.
# Now we are using sedparse, a direct translation of the GNU sed C code.
#
# To avoid having to adapt the whole sedsed code to the sedparse AST, the
# following `parse()` function will convert the sedparse AST into the same AST
# used by the old parser: a list having a flat dictionary for each command.
#
# TODO properly document sedsed and sedparse AST's