add script odfdo-replace

jdum · Dec 25, 2023 · e6d6149 · e6d6149
1 parent f10de01
commit e6d6149
Show file tree

Hide file tree

Showing 8 changed files with 588 additions and 49 deletions.
diff --git a/README.md b/README.md
@@ -68,12 +68,13 @@ There is no detailed documentation or tutorial, but:
  - the `recipes` folder contains more than 50 working sample scripts,
  - the `doc` folder contains an auto generated documentation.
 
-When installing odfdo, 4 scripts are installed:
+When installing odfdo, 5 scripts are installed:
 
  - `odfdo-diff`: show a *diff* between two .odt document.
  - `odfdo-folder`: convert standard ODF file to folder and files, and reverse.
  - `odfdo-show`: dump text from an ODF file to the standard output, and optionally styles and meta informations.
  - `odfdo-styles`: command line interface tool to manipulate styles of ODF files.
+ - `odfdo-replace`: find a pattern (regex) in an ODF file and replace by some string.
 
 About styles: the best way to apply style is by merging styles from a template
 document into your generated document (See `odfdo-styles` script).

diff --git a/doc/container.html b/doc/container.html
@@ -76,6 +76,12 @@ <h1 class="title">Module <code>odfdo.container</code></h1>
 from .utils import to_bytes, to_str
 
 
+def normalize_path(path: str) -&gt; str:
+    if path.endswith(&#34;/&#34;):  # folder
+        return PurePath(path[:-1]).as_posix() + &#34;/&#34;
+    return PurePath(path).as_posix()
+
+
 class Container:
     &#34;&#34;&#34;Representation of the ODF file.&#34;&#34;&#34;
 
@@ -139,16 +145,20 @@ <h1 class="title">Module <code>odfdo.container</code></h1>
         return clone
 
     def _read_zip(self) -&gt; None:
+        if isinstance(self.__path_like, io.BytesIO):
+            self.__path_like.seek(0)
         with ZipFile(self.__path_like) as zf:
             mimetype = zf.read(&#34;mimetype&#34;).decode(&#34;utf8&#34;, &#34;ignore&#34;)
             if mimetype not in ODF_MIMETYPES:
                 raise ValueError(f&#34;Document of unknown type {mimetype}&#34;)
             self.__parts[&#34;mimetype&#34;] = to_bytes(mimetype)
         if self.path is None:
+            if isinstance(self.__path_like, io.BytesIO):
+                self.__path_like.seek(0)
             # read the full file at once and forget file
             with ZipFile(self.__path_like) as zf:
                 for name in zf.namelist():
-                    upath = PurePath(name).as_posix()
+                    upath = normalize_path(name)
                     self.__parts[upath] = zf.read(name)
             self.__path_like = None
 
@@ -208,10 +218,7 @@ <h1 class="title">Module <code>odfdo.container</code></h1>
         &#34;&#34;&#34;Get bytes of a part from the Zip ODF file. No cache.&#34;&#34;&#34;
         try:
             with ZipFile(self.path) as zf:
-                if name.endswith(&#34;/&#34;):  # folder
-                    upath = PurePath(name[:-1]).as_posix() + &#34;/&#34;
-                else:
-                    upath = PurePath(name).as_posix()
+                upath = normalize_path(name)
                 self.__parts[upath] = zf.read(name)
                 return self.__parts[upath]
         except BadZipfile:
@@ -222,10 +229,7 @@ <h1 class="title">Module <code>odfdo.container</code></h1>
         try:
             with ZipFile(self.path) as zf:
                 for name in zf.namelist():
-                    if name.endswith(&#34;/&#34;):  # folder
-                        upath = PurePath(name[:-1]).as_posix() + &#34;/&#34;
-                    else:
-                        upath = PurePath(name).as_posix()
+                    upath = normalize_path(name)
                     self.__parts[upath] = zf.read(name)
         except BadZipfile:
             pass
@@ -300,10 +304,8 @@ <h1 class="title">Module <code>odfdo.container</code></h1>
             parts = []
             with ZipFile(self.path) as zf:
                 for name in zf.namelist():
-                    if name.endswith(&#34;/&#34;):
-                        parts.append(PurePath(name[:-1]).as_posix() + &#34;/&#34;)
-                    else:
-                        parts.append(PurePath(name).as_posix())
+                    upath = normalize_path(name)
+                    parts.append(upath)
             return parts
         elif self.__packaging == &#34;folder&#34;:
             return self._get_folder_parts()
@@ -467,6 +469,24 @@ <h1 class="title">Module <code>odfdo.container</code></h1>
 <section>
 </section>
 <section>
+<h2 class="section-title" id="header-functions">Functions</h2>
+<dl>
+<dt id="odfdo.container.normalize_path"><code class="name flex">
+<span>def <span class="ident">normalize_path</span></span>(<span>path: str) ‑> str</span>
+</code></dt>
+<dd>
+<div class="desc"></div>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def normalize_path(path: str) -&gt; str:
+    if path.endswith(&#34;/&#34;):  # folder
+        return PurePath(path[:-1]).as_posix() + &#34;/&#34;
+    return PurePath(path).as_posix()</code></pre>
+</details>
+</dd>
+</dl>
 </section>
 <section>
 <h2 class="section-title" id="header-classes">Classes</h2>
@@ -544,16 +564,20 @@ <h2 class="section-title" id="header-classes">Classes</h2>
         return clone
 
     def _read_zip(self) -&gt; None:
+        if isinstance(self.__path_like, io.BytesIO):
+            self.__path_like.seek(0)
         with ZipFile(self.__path_like) as zf:
             mimetype = zf.read(&#34;mimetype&#34;).decode(&#34;utf8&#34;, &#34;ignore&#34;)
             if mimetype not in ODF_MIMETYPES:
                 raise ValueError(f&#34;Document of unknown type {mimetype}&#34;)
             self.__parts[&#34;mimetype&#34;] = to_bytes(mimetype)
         if self.path is None:
+            if isinstance(self.__path_like, io.BytesIO):
+                self.__path_like.seek(0)
             # read the full file at once and forget file
             with ZipFile(self.__path_like) as zf:
                 for name in zf.namelist():
-                    upath = PurePath(name).as_posix()
+                    upath = normalize_path(name)
                     self.__parts[upath] = zf.read(name)
             self.__path_like = None
 
@@ -613,10 +637,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>
         &#34;&#34;&#34;Get bytes of a part from the Zip ODF file. No cache.&#34;&#34;&#34;
         try:
             with ZipFile(self.path) as zf:
-                if name.endswith(&#34;/&#34;):  # folder
-                    upath = PurePath(name[:-1]).as_posix() + &#34;/&#34;
-                else:
-                    upath = PurePath(name).as_posix()
+                upath = normalize_path(name)
                 self.__parts[upath] = zf.read(name)
                 return self.__parts[upath]
         except BadZipfile:
@@ -627,10 +648,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>
         try:
             with ZipFile(self.path) as zf:
                 for name in zf.namelist():
-                    if name.endswith(&#34;/&#34;):  # folder
-                        upath = PurePath(name[:-1]).as_posix() + &#34;/&#34;
-                    else:
-                        upath = PurePath(name).as_posix()
+                    upath = normalize_path(name)
                     self.__parts[upath] = zf.read(name)
         except BadZipfile:
             pass
@@ -705,10 +723,8 @@ <h2 class="section-title" id="header-classes">Classes</h2>
             parts = []
             with ZipFile(self.path) as zf:
                 for name in zf.namelist():
-                    if name.endswith(&#34;/&#34;):
-                        parts.append(PurePath(name[:-1]).as_posix() + &#34;/&#34;)
-                    else:
-                        parts.append(PurePath(name).as_posix())
+                    upath = normalize_path(name)
+                    parts.append(upath)
             return parts
         elif self.__packaging == &#34;folder&#34;:
             return self._get_folder_parts()
@@ -1008,10 +1024,8 @@ <h3>Methods</h3>
         parts = []
         with ZipFile(self.path) as zf:
             for name in zf.namelist():
-                if name.endswith(&#34;/&#34;):
-                    parts.append(PurePath(name[:-1]).as_posix() + &#34;/&#34;)
-                else:
-                    parts.append(PurePath(name).as_posix())
+                upath = normalize_path(name)
+                parts.append(upath)
         return parts
     elif self.__packaging == &#34;folder&#34;:
         return self._get_folder_parts()
@@ -1134,6 +1148,11 @@ <h1>Index</h1>
 <li><code><a title="odfdo" href="index.html">odfdo</a></code></li>
 </ul>
 </li>
+<li><h3><a href="#header-functions">Functions</a></h3>
+<ul class="">
+<li><code><a title="odfdo.container.normalize_path" href="#odfdo.container.normalize_path">normalize_path</a></code></li>
+</ul>
+</li>
 <li><h3><a href="#header-classes">Classes</a></h3>
 <ul>
 <li>

diff --git a/doc/index.html b/doc/index.html
@@ -4993,16 +4993,20 @@ <h3>Inherited members</h3>
         return clone
 
     def _read_zip(self) -&gt; None:
+        if isinstance(self.__path_like, io.BytesIO):
+            self.__path_like.seek(0)
         with ZipFile(self.__path_like) as zf:
             mimetype = zf.read(&#34;mimetype&#34;).decode(&#34;utf8&#34;, &#34;ignore&#34;)
             if mimetype not in ODF_MIMETYPES:
                 raise ValueError(f&#34;Document of unknown type {mimetype}&#34;)
             self.__parts[&#34;mimetype&#34;] = to_bytes(mimetype)
         if self.path is None:
+            if isinstance(self.__path_like, io.BytesIO):
+                self.__path_like.seek(0)
             # read the full file at once and forget file
             with ZipFile(self.__path_like) as zf:
                 for name in zf.namelist():
-                    upath = PurePath(name).as_posix()
+                    upath = normalize_path(name)
                     self.__parts[upath] = zf.read(name)
             self.__path_like = None
 
@@ -5062,10 +5066,7 @@ <h3>Inherited members</h3>
         &#34;&#34;&#34;Get bytes of a part from the Zip ODF file. No cache.&#34;&#34;&#34;
         try:
             with ZipFile(self.path) as zf:
-                if name.endswith(&#34;/&#34;):  # folder
-                    upath = PurePath(name[:-1]).as_posix() + &#34;/&#34;
-                else:
-                    upath = PurePath(name).as_posix()
+                upath = normalize_path(name)
                 self.__parts[upath] = zf.read(name)
                 return self.__parts[upath]
         except BadZipfile:
@@ -5076,10 +5077,7 @@ <h3>Inherited members</h3>
         try:
             with ZipFile(self.path) as zf:
                 for name in zf.namelist():
-                    if name.endswith(&#34;/&#34;):  # folder
-                        upath = PurePath(name[:-1]).as_posix() + &#34;/&#34;
-                    else:
-                        upath = PurePath(name).as_posix()
+                    upath = normalize_path(name)
                     self.__parts[upath] = zf.read(name)
         except BadZipfile:
             pass
@@ -5154,10 +5152,8 @@ <h3>Inherited members</h3>
             parts = []
             with ZipFile(self.path) as zf:
                 for name in zf.namelist():
-                    if name.endswith(&#34;/&#34;):
-                        parts.append(PurePath(name[:-1]).as_posix() + &#34;/&#34;)
-                    else:
-                        parts.append(PurePath(name).as_posix())
+                    upath = normalize_path(name)
+                    parts.append(upath)
             return parts
         elif self.__packaging == &#34;folder&#34;:
             return self._get_folder_parts()
@@ -5457,10 +5453,8 @@ <h3>Methods</h3>
         parts = []
         with ZipFile(self.path) as zf:
             for name in zf.namelist():
-                if name.endswith(&#34;/&#34;):
-                    parts.append(PurePath(name[:-1]).as_posix() + &#34;/&#34;)
-                else:
-                    parts.append(PurePath(name).as_posix())
+                upath = normalize_path(name)
+                parts.append(upath)
         return parts
     elif self.__packaging == &#34;folder&#34;:
         return self._get_folder_parts()

diff --git a/doc/scripts/index.html b/doc/scripts/index.html
@@ -34,6 +34,10 @@ <h2 class="section-title" id="header-submodules">Sub-modules</h2>
 <dd>
 <div class="desc"></div>
 </dd>
+<dt><code class="name"><a title="odfdo.scripts.replace" href="replace.html">odfdo.scripts.replace</a></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
 <dt><code class="name"><a title="odfdo.scripts.show" href="show.html">odfdo.scripts.show</a></code></dt>
 <dd>
 <div class="desc"></div>
@@ -66,6 +70,7 @@ <h1>Index</h1>
 <ul>
 <li><code><a title="odfdo.scripts.diff" href="diff.html">odfdo.scripts.diff</a></code></li>
 <li><code><a title="odfdo.scripts.folder" href="folder.html">odfdo.scripts.folder</a></code></li>
+<li><code><a title="odfdo.scripts.replace" href="replace.html">odfdo.scripts.replace</a></code></li>
 <li><code><a title="odfdo.scripts.show" href="show.html">odfdo.scripts.show</a></code></li>
 <li><code><a title="odfdo.scripts.styles" href="styles.html">odfdo.scripts.styles</a></code></li>
 </ul>