From 585a2d33d4d1482522be521331b140fa43381704 Mon Sep 17 00:00:00 2001
From: x512 <x512@pm.me>
Date: Sat, 23 Dec 2023 19:52:13 +1100
Subject: [PATCH 01/17] `drop_rf()` calls `get_factors()` if needed.
 ``README.md`` cleanup.

---
 .gitignore                  |   8 +-
 README.md                   | 207 +++++++++++++++++++-----------------
 getfactormodels/__main__.py |   5 +
 pyproject.toml              |   7 +-
 4 files changed, 120 insertions(+), 107 deletions(-)
diff --git a/.gitignore b/.gitignore
index 9bc9200..378607e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,16 +28,12 @@ lib64/
 MANIFEST
 sdist/
 var/
-venv.bak/
-venv/
+*venv.bak/
+*venv/
 wheels/
-
 .nox/
-.tox/
 .vscode/
 .ruff_cache
 .cache/
-*_venv/
-
 **/*.csv
 **/*.xlsx
\ No newline at end of file
diff --git a/README.md b/README.md
index 54a86f0..69f6d58 100644
--- a/README.md
+++ b/README.md
@@ -3,6 +3,7 @@
 # getfactormodels
 
 ![Python 3.11](https://img.shields.io/badge/Python-3.7+-306998.svg?logo=python&logoColor=ffde57&style=flat-square) ![PyPI - Version](https://img.shields.io/pypi/v/getfactormodels?style=flat-square&label=PyPI)
+![PyPI - Status](https://img.shields.io/pypi/status/getfactormodels?style=flat-square)
 
 
 Reliably retrieve data for various multi-factor asset pricing models.
@@ -14,7 +15,7 @@ Reliably retrieve data for various multi-factor asset pricing models.
 - Pastor and Stambaugh's liquidity factors <sup>[[5]](#5)</sup>
 - Mispricing factors of Stambaugh and Yuan<sup>[[6]](#6)</sup>
 - The $q$*-factor* model of Hou, Mo, Xue and Zhang<sup>[[7]](#7)</sup>
-- The augmented $q^5$*-factor* model of  Hou, Mo, Xue and Zhang<sup>[[8]](#8)</sup>
+- The augmented $q^5$*-factor* model of  Hou, Xue and Zhang<sup>[[8]](#8)</sup>
 - *Intermediary Capital Ratio* (ICR) of He, Kelly & Manela<sup>[[9]](#9)</sup>
 - The *DHS behavioural factors* of Daniel, Hirshleifer & Sun<sup>[[10]](#10)</sup>
 - The *HML* $^{DEVIL}$ factor of Asness & Frazzini<sup>[[11]](#11)</sup>
@@ -27,141 +28,134 @@ _Thanks to: Kenneth French, Robert Stambaugh, Lin Sun, Zhiguo He, AQR Capital Ma
 
 `getfactormodels` requires Python ``>=3.7``
 
-* Install with pip:
-  ```shell
-    pip install getfactormodels   
+* The easiest way to install getfactormodels is via pip:
+  
+  ```
+  $ pip install getfactormodels
   ```
 
 ## Usage
 
->[!WARNING]
->Please be aware that `getfactormodels` was recently released (Dec 20, 2023) and is not stable while this message is displayed.
->
-#### Python
+>[!IMPORTANT]
+>``getfactormodels`` is new. It was released on December 20, 2023. Don't rely on it for anything.
 
-After installing, import ``getfactormodels`` and call ``get_factors()`` with the ``model`` and ``frequency`` parameters. Optionally, specify a ``start_date`` and ``end_date``
-* For example, to retrieve the daily q-factor model data:
+After installation, import and call the ``get_factors()`` function with the ``model`` and ``frequency`` params:
 
-    ```py
-      import getfactormodels
-    
-      getfactormodels.get_factors(model='q', frequency='d')
-    ```
-    > _Trimmed output:_
-    ```txt
-    > df
-                 Mkt-RF      R_ME      R_IA     R_ROE      R_EG        RF
-    date                                                                  
-    1967-01-03  0.000778  0.004944  0.001437 -0.007118 -0.008563  0.000187
-    1967-01-04  0.001667 -0.003487 -0.000631 -0.002044 -0.000295  0.000187
-    1967-01-05  0.012990  0.004412 -0.005688  0.000838 -0.003075  0.000187
-    1967-01-06  0.007230  0.006669  0.008897  0.003603  0.002669  0.000187
-    1967-01-09  0.008439  0.006315  0.000331  0.004949  0.002979  0.000187
-    ...              ...       ...       ...       ...       ...       ...
-    2022-12-23  0.005113 -0.001045  0.004000  0.010484  0.003852  0.000161
-    2022-12-27 -0.005076 -0.001407  0.010190  0.009206  0.003908  0.000161
-    2022-12-28 -0.012344 -0.004354  0.000133 -0.010457 -0.004953  0.000161
-    2022-12-29  0.018699  0.008568 -0.008801 -0.012686 -0.002162  0.000161
-    2022-12-30 -0.002169  0.001840  0.001011 -0.004151 -0.003282  0.000161
-
-     [14096 rows x 6 columns]
-    ```
-
-    * or, retreive the monthly liquidity factors of Pastor and Stambaugh for the 1990s:
-
-    ```py
-      import getfactormodels as getfactormodels
-    
-      df = getfactormodels.get_factors(model='liquidity', frequency='m', start_date='1990-01-01', end_date='1999-12-31')
-    ```
-    > If you don't have time to type `liquidity`, type `liq`, or `ps`--there's a handy regex.
+* For example, retrieving the monthly ${q}^{5}$ factor model:
+  
+  ```py
+   import getfactormodels
+  
+   data = getfactormodels.get_factors(model='q', frequency='m')
+  ```
 
-    * or, saving the monthly 3-factor model of Fama & French to a file:
+  > _Trimmed output:_
+
+  ```txt
+  print(data)
+                Mkt-RF      R_ME      R_IA     R_ROE      R_EG        RF
+  date                                                                  
+  1967-01-03  0.000778  0.004944  0.001437 -0.007118 -0.008563  0.000187
+  1967-01-04  0.001667 -0.003487 -0.000631 -0.002044 -0.000295  0.000187
+  1967-01-05  0.012990  0.004412 -0.005688  0.000838 -0.003075  0.000187
+  1967-01-06  0.007230  0.006669  0.008897  0.003603  0.002669  0.000187
+  1967-01-09  0.008439  0.006315  0.000331  0.004949  0.002979  0.000187
+  ...              ...       ...       ...       ...       ...       ...
+  2022-12-23  0.005113 -0.001045  0.004000  0.010484  0.003852  0.000161
+  2022-12-27 -0.005076 -0.001407  0.010190  0.009206  0.003908  0.000161
+  2022-12-28 -0.012344 -0.004354  0.000133 -0.010457 -0.004953  0.000161
+  2022-12-29  0.018699  0.008568 -0.008801 -0.012686 -0.002162  0.000161
+  2022-12-30 -0.002169  0.001840  0.001011 -0.004151 -0.003282  0.000161
+
+    [14096 rows x 6 columns]
+  ```
 
-    ```py
-      import getfactormodels as gfm
+* Retrieving the daily data for the Fama-French 3-factor model, since `start_date`:
 
-      df = gfm.get_factors(model='ff3', frequency='m', output="ff3_data.csv")
-    ```
-     >The output parameter accepts a filename, path or directory, and can be one of csv, md, txt, xlsx, pkl.
+  ```py
+  import getfactormodels as gfm
 
-* You can also import just the models that you need.:
+  df = gfm.get_factors(model='ff3', frequency='d', start_date=`2006-01-01`)
+  ```
 
-  * For example, to import only the *ICR* and *q*-factor models: 
+* Retrieving data for Stambaugh and Yuan's monthly *Mispricing* factors, between `start_date` and `end_date`, and saving the data to a file:
 
-    ```py
-      from getfactormodels import icr_factors, q_factors
+  ```py
+  import getfactormodels as gfm
+  
+  df = gfm.get_factors(model='mispricing', start_date='1970-01-01', end_date=1999-12-31, output='mispricing_factors.csv')
+  ```
 
-      # Passing a model function with no params defaults to monthly.
-      df = icr_factors()
+  >``output`` can be a filename, directory, or path. If no extension is specified, defaults to .csv (can be one of: .xlsx, .csv, .txt, .pkl, .md)
 
-      # The 'q' models, and the 3-factor model of Fama-French also have weekly data.
-      df = q_factors(frequency="W", start_date="1992-01-01)
-    ```
+You can import only the models that you need:
 
-  * If using ``ff_factors()``, then an additional ``model`` parameter should be specified:
+* For example, to import only the *ICR* and *q-factor* models:
 
-    ```py
-    from getfactormodels import ff_factors
+  ```py
+  from getfactormodels import icr_factors, q_factors
 
-    # To get annual data for the 5-factor model:
-    data = ff_factors(model="5", frequency="Y", output=".xlsx")
+  # Passing a model function without params defaults to monthly data.
+  df = icr_factors()
 
-    # Daily 3-factor model data, since 1970 (not specifying an end date
-    # will return data up until today):
-    data = ff_factors(model="3", frequency="D", start_date="1970-01-01")
-    ```
-    > Output allows just an extension to be specified.
+  # The 'q' models, and the 3-factor model of Fama-French have weekly data available:
+  df = q_factors(frequency="W", start_date="1992-01-01, output='.xlsx')
+  ```
 
-* or import all the models:
+  >``output`` allows just a file extension (with the `.`, else it'll be passed as a filename).
 
-  ```py
-    from getfactormodels.models import models
+* When using `ff_factors()`, specify an additional `model` parameter (**this might be changed**):
   
-    df = models.barillas_shanken_factors('m')
+  ```py
+  # To get annual data for the 5-factor model:
+  data = ff_factors(model="5", frequency="Y", output=".xlsx")
+
+  # Daily 3-factor model data, since 1970 (not specifying an end date
+  # will return data up until today):
+  data = ff_factors(model="3", frequency="D", start_date="1970-01-01")
   ```
 
-* There's also the `FactorExtractor` class that the CLI uses (it doesn't really do a whole lot yet):
 
-  ```python
-    from getfactormodels import FactorExtractor
+There's also a ``FactorExtractor`` class (which doesn't do much yet, it's mainly used by the CLI; lots to do):
+  ```
+  from getfactormodels import FactorExtractor
 
-    fe = FactorExtractor(model='carhart', frequency='m', start_date='1980-01-01', end_date='1980-05-01')
-    fe.get_factors()
-    fe.to_file('carhart_factors.md')
-    ```
+  fe = FactorExtractor(model='carhart', start_date='1980-01-01', end_date='1980-05-01)
+  fe.get_factors()
+  fe.drop_rf() 
+  fe.to_file('~/carhart_factors.md')
+  ```
 
-  * _The resulting ``carhart_factors.md`` file will look like this:_
+* _The resulting ``carhart_factors.md`` file will look like this:_
     
-    | date                |   Mkt-RF |     SMB |     HML |     MOM |     RF |
-    |:--------------------|---------:|--------:|--------:|--------:|-------:|
-    | 1980-01-31 00:00:00 |   0.0551 |  0.0162 |  0.0175 |  0.0755 | 0.008  |
-    | 1980-02-29 00:00:00 |  -0.0122 | -0.0185 |  0.0061 |  0.0788 | 0.0089 |
-    | 1980-03-31 00:00:00 |  -0.129  | -0.0664 | -0.0101 | -0.0955 | 0.0121 |
-    | 1980-04-30 00:00:00 |   0.0397 |  0.0105 |  0.0106 | -0.0043 | 0.0126 |
+  | date                |   Mkt-RF |     SMB |     HML |     MOM |
+  |:--------------------|---------:|--------:|--------:|--------:|
+  | 1980-01-31 00:00:00 |   0.0551 |  0.0162 |  0.0175 |  0.0755 |
+  | 1980-02-29 00:00:00 |  -0.0122 | -0.0185 |  0.0061 |  0.0788 |
+  | 1980-03-31 00:00:00 |  -0.129  | -0.0664 | -0.0101 | -0.0955 |
+  | 1980-04-30 00:00:00 |   0.0397 |  0.0105 |  0.0106 | -0.0043 |
 
 
-#### Using the CLI
-* You can also use getfactormodels from the command line.
+### CLI
+``bash >=4.2``
+* You can also use getfactormodels from the command line. It's very barebones, here's the `-h` (there is no `--help` yet)
 
-    ```bash
-    $ getfactormodels -h
+  ```bash
+  $ getfactormodels -h
 
-    usage: getfactormodels [-h] -m MODEL [-f FREQ] [-s START] [-e END] [-o OUTPUT] [--no_rf]
-    ```
+  usage: getfactormodels [-h] -m MODEL [-f FREQ] [-s START] [-e END] [-o OUTPUT] [--no_rf]
+  ```
 
 * An example of how to use the CLI to retrieve the Fama-French 3-factor model data:
-    ```bash
-       getfactormodels --model ff3 --frequency M --start-date 1960-01-01 --end-date 2020-12-31 --output "filename.csv"
-    ```
-    > Accepted file extensions are .csv, .txt, .xlsx, and .md. If no extension is given, the output file will be .csv. The --output flag allows a filename, filepath or a directory. If only an extension is provided (including the . else it'll be passed as a filename), a name will be generated.
-    
-* Here's another example that retrieves the annual Fama-French 5-factor data without the RF column:
+  ```bash
+  $ getfactormodels --model ff3 --frequency M --start-date 1960-01-01 --end-date 2020-12-31 --output ".csv"
+  ```
+
+* Here's another example that retrieves the annual Fama-French 5-factor data without the RF column (using ``--no_rf``)
 
   ```sh
-    getfactormodels -m 5 -f Y -s 1960-01-01 -e 2020-12-31 --no_rf -o ~/some_dir/filename.xlsx
+  $ getfactormodels -m ff5 -f Y -s 1960-01-01 -e 2020-12-31 --no_rf -o ~/some_dir/filename.xlsx
   ```
-    > `--no_rf` will return the factor model without an RF column.
 
 ## References
 1. <a id="1"></a> E. F. Fama and K. R. French, ‘Common risk factors in the returns on stocks and bonds’, *Journal of Financial Economics*, vol. 33, no. 1, pp. 3–56, 1993. [PDF](https://people.duke.edu/~charvey/Teaching/BA453_2006/FF_Common_risk.pdf)
@@ -200,3 +194,16 @@ After installing, import ``getfactormodels`` and call ``get_factors()`` with the
 
 [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat-square&labelColor=ef8336)](https://pycqa.github.io/isort/)
 [![Ruff](https://img.shields.io/badge/-ruff-%23261230?style=flat-square&logo=ruff&logoColor=d7ff64)](https://simpleicons.org/?q=ruff)
+---
+
+#### Known issues
+
+* The first `hml_devil_factors()` retrieval is slow, because the download from aqr.com is slow. It's the only model, so far, implementing a cache—daily data expires at the end of the day and is only re-downloaded when the requested`end_date` exceeds the file's last index date; similar for monthly, expiring at EOM and re-downloaded when needed.
+
+
+#### Todo
+
+- [ ] Docs
+  - [ ] Examples
+- [ ] Tests
+- Error handling
\ No newline at end of file
diff --git a/getfactormodels/__main__.py b/getfactormodels/__main__.py
index e54ceb4..c8cf40e 100755
--- a/getfactormodels/__main__.py
+++ b/getfactormodels/__main__.py
@@ -58,6 +58,7 @@ def get_factors(model: str = "3",
         raise ValueError(f"Invalid model: {model}")
 
     df = function(frequency, start_date, end_date, output)
+
     return df
 
 
@@ -125,6 +126,10 @@ def get_factors(self) -> pd.DataFrame:
 
     def drop_rf(self, df):
         """Drop the ``RF`` column from the DataFrame."""
+        # get_factors if not already done
+        if df is None:
+            df = self.get_factors()
+
         if "RF" in df.columns:
             df = df.drop(columns=["RF"])
         else:
diff --git a/pyproject.toml b/pyproject.toml
index 3799467..c2043f0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,7 +12,12 @@ classifiers = [
     "Topic :: Office/Business :: Financial :: Investment",
     "Topic :: Scientific/Engineering :: Mathematics",
     "Topic :: Scientific/Engineering :: Information Analysis",
+    "Programming Language :: Python :: 3.7",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3 :: Only",
     "Operating System :: OS Independent",
     "Environment :: Console",
@@ -79,7 +84,7 @@ select = ["E4", "E7", "E9", "F", "B",
         # "C901" (McCabe complexity)
 
 # Undo soon:
-ignore = ["PD901",]  # generic variable name `df` for DataFrames
+ignore = ["PD901",]  # TODO: fix all generic variable name `df` for DataFrames
 
 # see: docs.astral.sh/ruff/configuration/
 fixable = ["W29", "W5", "E241", "E261"]

From c2857aaa79e4a79d96b39fa26c628854ed6a400c Mon Sep 17 00:00:00 2001
From: x512 <x512@pm.me>
Date: Sat, 23 Dec 2023 19:58:58 +1100
Subject: [PATCH 02/17] fix: syntax highlights

---
 README.md | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 69f6d58..c690a74 100644
--- a/README.md
+++ b/README.md
@@ -30,20 +30,20 @@ _Thanks to: Kenneth French, Robert Stambaugh, Lin Sun, Zhiguo He, AQR Capital Ma
 
 * The easiest way to install getfactormodels is via pip:
   
-  ```
+  ```shell
   $ pip install getfactormodels
   ```
 
 ## Usage
 
 >[!IMPORTANT]
->``getfactormodels`` is new. It was released on December 20, 2023. Don't rely on it for anything.
+>![PyPI - Status](https://img.shields.io/pypi/status/getfactormodels?style=flat-square)``getfactormodels`` is new. It was released on December 20, 2023. Don't rely on it for anything.![PyPI - Status](https://img.shields.io/pypi/status/getfactormodels?style=flat-square)
 
 After installation, import and call the ``get_factors()`` function with the ``model`` and ``frequency`` params:
 
 * For example, retrieving the monthly ${q}^{5}$ factor model:
   
-  ```py
+  ```python
    import getfactormodels
   
    data = getfactormodels.get_factors(model='q', frequency='m')
@@ -72,7 +72,7 @@ After installation, import and call the ``get_factors()`` function with the ``mo
 
 * Retrieving the daily data for the Fama-French 3-factor model, since `start_date`:
 
-  ```py
+  ```python
   import getfactormodels as gfm
 
   df = gfm.get_factors(model='ff3', frequency='d', start_date=`2006-01-01`)
@@ -80,7 +80,7 @@ After installation, import and call the ``get_factors()`` function with the ``mo
 
 * Retrieving data for Stambaugh and Yuan's monthly *Mispricing* factors, between `start_date` and `end_date`, and saving the data to a file:
 
-  ```py
+  ```python
   import getfactormodels as gfm
   
   df = gfm.get_factors(model='mispricing', start_date='1970-01-01', end_date=1999-12-31, output='mispricing_factors.csv')
@@ -92,7 +92,7 @@ You can import only the models that you need:
 
 * For example, to import only the *ICR* and *q-factor* models:
 
-  ```py
+  ```python
   from getfactormodels import icr_factors, q_factors
 
   # Passing a model function without params defaults to monthly data.
@@ -106,7 +106,7 @@ You can import only the models that you need:
 
 * When using `ff_factors()`, specify an additional `model` parameter (**this might be changed**):
   
-  ```py
+  ```python
   # To get annual data for the 5-factor model:
   data = ff_factors(model="5", frequency="Y", output=".xlsx")
 
@@ -117,7 +117,7 @@ You can import only the models that you need:
 
 
 There's also a ``FactorExtractor`` class (which doesn't do much yet, it's mainly used by the CLI; lots to do):
-  ```
+  ```python
   from getfactormodels import FactorExtractor
 
   fe = FactorExtractor(model='carhart', start_date='1980-01-01', end_date='1980-05-01)
@@ -140,20 +140,20 @@ There's also a ``FactorExtractor`` class (which doesn't do much yet, it's mainly
 ``bash >=4.2``
 * You can also use getfactormodels from the command line. It's very barebones, here's the `-h` (there is no `--help` yet)
 
-  ```bash
+  ```shell
   $ getfactormodels -h
 
   usage: getfactormodels [-h] -m MODEL [-f FREQ] [-s START] [-e END] [-o OUTPUT] [--no_rf]
   ```
 
 * An example of how to use the CLI to retrieve the Fama-French 3-factor model data:
-  ```bash
+  ```shell
   $ getfactormodels --model ff3 --frequency M --start-date 1960-01-01 --end-date 2020-12-31 --output ".csv"
   ```
 
 * Here's another example that retrieves the annual Fama-French 5-factor data without the RF column (using ``--no_rf``)
 
-  ```sh
+  ```shell
   $ getfactormodels -m ff5 -f Y -s 1960-01-01 -e 2020-12-31 --no_rf -o ~/some_dir/filename.xlsx
   ```
 

From f8083d77945b875d67bc14c181d12b7925515f0d Mon Sep 17 00:00:00 2001
From: x512 <x512@pm.me>
Date: Sat, 23 Dec 2023 20:02:20 +1100
Subject: [PATCH 03/17] clean

---
 README.md | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index c690a74..1e6b2a2 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,8 @@ _Thanks to: Kenneth French, Robert Stambaugh, Lin Sun, Zhiguo He, AQR Capital Ma
 ## Usage
 
 >[!IMPORTANT]
->![PyPI - Status](https://img.shields.io/pypi/status/getfactormodels?style=flat-square)``getfactormodels`` is new. It was released on December 20, 2023. Don't rely on it for anything.![PyPI - Status](https://img.shields.io/pypi/status/getfactormodels?style=flat-square)
+>![PyPI - Status](https://img.shields.io/pypi/status/getfactormodels?style=flat-square)
+>``getfactormodels`` is new. It was released on December 20, 2023. Don't rely on it for anything.
 
 After installation, import and call the ``get_factors()`` function with the ``model`` and ``frequency`` params:
 
@@ -115,8 +116,8 @@ You can import only the models that you need:
   data = ff_factors(model="3", frequency="D", start_date="1970-01-01")
   ```
 
-
 There's also a ``FactorExtractor`` class (which doesn't do much yet, it's mainly used by the CLI; lots to do):
+
   ```python
   from getfactormodels import FactorExtractor
 
@@ -137,7 +138,9 @@ There's also a ``FactorExtractor`` class (which doesn't do much yet, it's mainly
 
 
 ### CLI
+
 ``bash >=4.2``
+
 * You can also use getfactormodels from the command line. It's very barebones, here's the `-h` (there is no `--help` yet)
 
   ```shell
@@ -147,6 +150,7 @@ There's also a ``FactorExtractor`` class (which doesn't do much yet, it's mainly
   ```
 
 * An example of how to use the CLI to retrieve the Fama-French 3-factor model data:
+
   ```shell
   $ getfactormodels --model ff3 --frequency M --start-date 1960-01-01 --end-date 2020-12-31 --output ".csv"
   ```

From 25be8323f3dbc3cfd44ee1f20340ca9e7276ae84 Mon Sep 17 00:00:00 2001
From: x512 <x512@pm.me>
Date: Sat, 23 Dec 2023 22:15:26 +1100
Subject: [PATCH 04/17] few fixes

---
 README.md                        | 13 +++++++++----
 getfactormodels/models/models.py | 13 +++++++++----
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 1e6b2a2..8cec408 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@ Reliably retrieve data for various multi-factor asset pricing models.
 - Pastor and Stambaugh's liquidity factors <sup>[[5]](#5)</sup>
 - Mispricing factors of Stambaugh and Yuan<sup>[[6]](#6)</sup>
 - The $q$*-factor* model of Hou, Mo, Xue and Zhang<sup>[[7]](#7)</sup>
-- The augmented $q^5$*-factor* model of  Hou, Xue and Zhang<sup>[[8]](#8)</sup>
+- The augmented $q^5$*-factor* model of  Hou, Mo, Xue and Zhang<sup>[[8]](#8)</sup>
 - *Intermediary Capital Ratio* (ICR) of He, Kelly & Manela<sup>[[9]](#9)</sup>
 - The *DHS behavioural factors* of Daniel, Hirshleifer & Sun<sup>[[10]](#10)</sup>
 - The *HML* $^{DEVIL}$ factor of Asness & Frazzini<sup>[[11]](#11)</sup>
@@ -38,6 +38,7 @@ _Thanks to: Kenneth French, Robert Stambaugh, Lin Sun, Zhiguo He, AQR Capital Ma
 
 >[!IMPORTANT]
 >![PyPI - Status](https://img.shields.io/pypi/status/getfactormodels?style=flat-square)
+
 >``getfactormodels`` is new. It was released on December 20, 2023. Don't rely on it for anything.
 
 After installation, import and call the ``get_factors()`` function with the ``model`` and ``frequency`` params:
@@ -53,7 +54,7 @@ After installation, import and call the ``get_factors()`` function with the ``mo
   > _Trimmed output:_
 
   ```txt
-  print(data)
+  > print(data)
                 Mkt-RF      R_ME      R_IA     R_ROE      R_EG        RF
   date                                                                  
   1967-01-03  0.000778  0.004944  0.001437 -0.007118 -0.008563  0.000187
@@ -161,6 +162,10 @@ There's also a ``FactorExtractor`` class (which doesn't do much yet, it's mainly
   $ getfactormodels -m ff5 -f Y -s 1960-01-01 -e 2020-12-31 --no_rf -o ~/some_dir/filename.xlsx
   ```
 
+## Data Availability
+
+>[TODO]
+
 ## References
 1. <a id="1"></a> E. F. Fama and K. R. French, ‘Common risk factors in the returns on stocks and bonds’, *Journal of Financial Economics*, vol. 33, no. 1, pp. 3–56, 1993. [PDF](https://people.duke.edu/~charvey/Teaching/BA453_2006/FF_Common_risk.pdf)
 2. <a id="2"></a> M. Carhart, ‘On Persistence in Mutual Fund Performance’, *Journal of Finance*, vol. 52, no. 1, pp. 57–82, 1997. [PDF](https://onlinelibrary.wiley.com/doi/full/10.1111/j.1540-6261.1997.tb03808.x)
@@ -202,7 +207,7 @@ There's also a ``FactorExtractor`` class (which doesn't do much yet, it's mainly
 
 #### Known issues
 
-* The first `hml_devil_factors()` retrieval is slow, because the download from aqr.com is slow. It's the only model, so far, implementing a cache—daily data expires at the end of the day and is only re-downloaded when the requested`end_date` exceeds the file's last index date; similar for monthly, expiring at EOM and re-downloaded when needed.
+* The first `hml_devil_factors()` retrieval is slow, because the download from aqr.com is slow. It's the only model, so far, implementing a cache—daily data expires at the end of the day and is only re-downloaded when the requested`end_date` exceeds the file's last index date. Similar for monthly, expiring at EOM and re-downloaded when needed.
 
 
 #### Todo
@@ -210,4 +215,4 @@ There's also a ``FactorExtractor`` class (which doesn't do much yet, it's mainly
 - [ ] Docs
   - [ ] Examples
 - [ ] Tests
-- Error handling
\ No newline at end of file
+- [ ] Error handling
\ No newline at end of file
diff --git a/getfactormodels/models/models.py b/getfactormodels/models/models.py
index 341fece..87fe895 100644
--- a/getfactormodels/models/models.py
+++ b/getfactormodels/models/models.py
@@ -37,7 +37,7 @@
 # TODO: "PEP 484 prohibits implicit `Optional`" see: RUFF013.
 
 
-def ff_factors(model: str = "3",  # TODO: fix: _get_ff_factors filepath param
+def ff_factors(model: str = "3",
                frequency: str = "M",
                start_date: Optional[str] = None,
                end_date: Optional[str] = None,
@@ -356,7 +356,8 @@ def _download_hml_devil(frequency):
 
     data = pd.concat(dfs, axis=1)
     data.rename(columns={'MKT': 'Mkt-RF',
-                         'HML Devil': 'HML_DEVIL'}, inplace=True)
+                         'HML Devil': 'HML_DEVIL'})
+
     data = data.astype(float)
 
     return data
@@ -422,7 +423,11 @@ def hml_devil_factors(frequency='M',
 
     # Otherwise, compute the result and store it in the cache
     data = _get_hml_devil(frequency, start_date, end_date, output, series)
+
+    # UMD returns NaNs for 1926
+    data = data.dropna()
     cache[cache_key] = data
+
     return _process(data, start_date, end_date, filepath=output)
 
 
@@ -448,7 +453,7 @@ def barillas_shanken_factors(frequency: str = 'M',
     ff = ff_factors(model='6', frequency=frequency)[['Mkt-RF', 'SMB', 'UMD',
                                                      'RF']]
 
-    df = pd.merge(q, ff, left_index=True, right_index=True, how='inner')
+    df = q.merge(ff, left_index=True, right_index=True, how='inner')
 
     hml_devil = hml_devil_factors(frequency=frequency, start_date=start_date,
                                   series=True)
@@ -456,7 +461,7 @@ def barillas_shanken_factors(frequency: str = 'M',
     hml_devil = hml_devil.rename('HML_m')
     hml_devil.index.name = 'date'
 
-    df = pd.merge(df, hml_devil, left_index=True,
+    df = df.merge(hml_devil, left_index=True,
                   right_index=True, how='inner')
 
     return _process(df, start_date, end_date, filepath=output)

From 29ccee7e07738ef162fc421f5bcbc67debb40383 Mon Sep 17 00:00:00 2001
From: x512 <x512@pm.me>
Date: Sat, 23 Dec 2023 22:53:22 +1100
Subject: [PATCH 05/17] added: ``.drop_mkt()``, ``--nomkt``

---
 getfactormodels/__main__.py         | 25 ++++++++++++++++++++++++-
 getfactormodels/models/ff_models.py |  2 +-
 getfactormodels/utils/cli.py        |  4 +++-
 getfactormodels/utils/utils.py      |  3 ++-
 4 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/getfactormodels/__main__.py b/getfactormodels/__main__.py
index c8cf40e..bccb2dc 100755
--- a/getfactormodels/__main__.py
+++ b/getfactormodels/__main__.py
@@ -2,6 +2,8 @@
 import os
 import pandas as pd
 from dateutil import parser
+from pathlib import Path
+
 # ruff: noqa: RUF100
 from getfactormodels.models.models import (barillas_shanken_factors,  # noqa: F401, E501
                                            carhart_factors, dhs_factors,
@@ -98,6 +100,10 @@ def no_rf(self):
         """Sets the _no_rf flag to True."""
         self._no_rf = True
 
+    def no_mkt(self):
+        """Sets the _no_mkt flag to True."""
+        self._no_mkt = True
+
     @staticmethod
     def validate_date_format(date_string):
         """
@@ -121,6 +127,8 @@ def get_factors(self) -> pd.DataFrame:
 
         if self._no_rf:
             self.df = self.drop_rf(self.df)
+        if self._no_mkt:
+            self.df = self.drop_mkt(self.df)
 
         return self.df
 
@@ -137,6 +145,18 @@ def drop_rf(self, df):
 
         return df
 
+    def drop_mkt(self, df):
+        """Drop the ``MKT`` column from the DataFrame."""
+        if df is None:
+            df = self.get_factors()
+
+        if "Mkt-RF" in df.columns:
+            df = df.drop(columns=["Mkt-RF"])
+        else:
+            print("`drop_mkt` was called but no MKT column was found.")
+
+        return df
+
     def to_file(self, filename):
         """
         Save the factor data to a file.
@@ -158,12 +178,15 @@ def main():
                                 start_date=args.start, end_date=args.end)
     if args.no_rf:
         extractor.no_rf()
+    elif args.no_mkt:
+        extractor.no_mkt()
 
     df = extractor.get_factors()
 
     if args.output:
         extractor.to_file(args.output)
-        print(f'File saved to "{os.path.abspath(args.output)}"')
+        print(f'File saved to "{Path(args.output).resolve()}"')
+
     else:
         print(df)
 
diff --git a/getfactormodels/models/ff_models.py b/getfactormodels/models/ff_models.py
index 84a9228..5ccdcd4 100644
--- a/getfactormodels/models/ff_models.py
+++ b/getfactormodels/models/ff_models.py
@@ -21,7 +21,7 @@
 
 """
 import numpy as np
-import pandas as pd  # noqa: D100
+import pandas as pd
 from ..utils.utils import (  # noqa - todo: fix relative import from parent modules banned
     _process, get_zip_from_url)
 
diff --git a/getfactormodels/utils/cli.py b/getfactormodels/utils/cli.py
index c785b88..bef1654 100644
--- a/getfactormodels/utils/cli.py
+++ b/getfactormodels/utils/cli.py
@@ -21,8 +21,10 @@ def parse_args():
                         help='The start date for the data.')
     parser.add_argument('-e', '--end', type=str, required=False,
                         help='The end date for the data.')
-    parser.add_argument('-o', '--output', type=str, required=False,  # noqa
+    parser.add_argument('-o', '--output', type=str, required=False,
                         help='The file to save the data to.')
     parser.add_argument('--no_rf', '--no-rf', '--norf', action='store_true',
                         help='Drop the RF column from the DataFrame.')
+    parser.add_argument('--no_mkt', '--no-mkt', '--nomkt', action='store_true',
+                        help='Drop the MKT column from the DataFrame.')
     return parser.parse_args()
diff --git a/getfactormodels/utils/utils.py b/getfactormodels/utils/utils.py
index 9cb928c..60a78d7 100644
--- a/getfactormodels/utils/utils.py
+++ b/getfactormodels/utils/utils.py
@@ -80,7 +80,8 @@ def _save_to_file(data, filename=None, output_dir=None):
             '.md': data.to_markdown, }
 
         if filename is None:
-            filename = datetime.now().strftime('%Y_%m_%d-%H%M') + '.csv'
+            filename = datetime.now().strftime('%Y_%m_%d-%H%M') \
+                + '.csv'
         elif '.' not in filename:
             filename += '.csv'
 

From 73b7cebb9e54b7bf713c5d628ea15de826338ba1 Mon Sep 17 00:00:00 2001
From: x512 <x512@pm.me>
Date: Sat, 23 Dec 2023 23:08:14 +1100
Subject: [PATCH 06/17] fix: forgot ``_no_mkt`` in init

---
 getfactormodels/__main__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/getfactormodels/__main__.py b/getfactormodels/__main__.py
index bccb2dc..1406969 100755
--- a/getfactormodels/__main__.py
+++ b/getfactormodels/__main__.py
@@ -94,6 +94,7 @@ def __init__(self,
             else None
         self.output = output
         self._no_rf = False
+        self._no_mkt = False
         self.df = None
 
     def no_rf(self):

From f6b2251e834b6e8049571f0bd5ebd575f058e692 Mon Sep 17 00:00:00 2001
From: x512 <x512@pm.me>
Date: Sat, 23 Dec 2023 23:13:31 +1100
Subject: [PATCH 07/17] fix: typo

---
 getfactormodels/__main__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/getfactormodels/__main__.py b/getfactormodels/__main__.py
index 1406969..04ad040 100755
--- a/getfactormodels/__main__.py
+++ b/getfactormodels/__main__.py
@@ -179,7 +179,7 @@ def main():
                                 start_date=args.start, end_date=args.end)
     if args.no_rf:
         extractor.no_rf()
-    elif args.no_mkt:
+    if args.no_mkt:
         extractor.no_mkt()
 
     df = extractor.get_factors()

From 7a85b3317f4606a90713e0eb0c49ad0c20c30fbc Mon Sep 17 00:00:00 2001
From: x512 <x512@pm.me>
Date: Sat, 23 Dec 2023 23:16:08 +1100
Subject: [PATCH 08/17] clean: imports (isort)

---
 getfactormodels/__main__.py      | 9 ++++-----
 getfactormodels/models/models.py | 1 +
 noxfile.py                       | 1 +
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/getfactormodels/__main__.py b/getfactormodels/__main__.py
index 04ad040..349a693 100755
--- a/getfactormodels/__main__.py
+++ b/getfactormodels/__main__.py
@@ -1,12 +1,11 @@
 # -*- coding: utf-8 -*-
-import os
+from pathlib import Path
 import pandas as pd
 from dateutil import parser
-from pathlib import Path
-
 # ruff: noqa: RUF100
-from getfactormodels.models.models import (barillas_shanken_factors,  # noqa: F401, E501
-                                           carhart_factors, dhs_factors,
+from getfactormodels.models.models import \
+    barillas_shanken_factors  # noqa: F401, E501
+from getfactormodels.models.models import (carhart_factors, dhs_factors,
                                            ff_factors, hml_devil_factors,
                                            icr_factors, liquidity_factors,
                                            mispricing_factors,
diff --git a/getfactormodels/models/models.py b/getfactormodels/models/models.py
index 87fe895..effac33 100644
--- a/getfactormodels/models/models.py
+++ b/getfactormodels/models/models.py
@@ -34,6 +34,7 @@
 import requests
 from getfactormodels.utils.utils import _process, get_file_from_url
 from .ff_models import _get_ff_factors
+
 # TODO: "PEP 484 prohibits implicit `Optional`" see: RUFF013.
 
 
diff --git a/noxfile.py b/noxfile.py
index 8c4db6e..ac13212 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -1,4 +1,5 @@
 import nox
+
 # TODO: mypy
 
 

From 88b995c9a7165131561af1507534b7d02a20d8fb Mon Sep 17 00:00:00 2001
From: x512 <x512@pm.me>
Date: Sun, 24 Dec 2023 03:33:06 +1100
Subject: [PATCH 09/17] added typehints, cleaned up. todo:
 ``hml_devil_factors()`` isn't using cache in cli!

---
 README.md                           | 13 +++---
 getfactormodels/__init__.py         |  6 +--
 getfactormodels/__main__.py         | 35 +++++++++--------
 getfactormodels/models/ff_models.py | 39 +++++++++++-------
 getfactormodels/models/models.py    | 61 ++++++++++++++---------------
 getfactormodels/utils/cli.py        |  4 +-
 getfactormodels/utils/utils.py      |  5 ++-
 pyproject.toml                      | 25 ++++++------
 8 files changed, 103 insertions(+), 85 deletions(-)

diff --git a/README.md b/README.md
index 8cec408..8d76048 100644
--- a/README.md
+++ b/README.md
@@ -69,7 +69,7 @@ After installation, import and call the ``get_factors()`` function with the ``mo
   2022-12-29  0.018699  0.008568 -0.008801 -0.012686 -0.002162  0.000161
   2022-12-30 -0.002169  0.001840  0.001011 -0.004151 -0.003282  0.000161
 
-    [14096 rows x 6 columns]
+  [14096 rows x 6 columns]
   ```
 
 * Retrieving the daily data for the Fama-French 3-factor model, since `start_date`:
@@ -137,17 +137,18 @@ There's also a ``FactorExtractor`` class (which doesn't do much yet, it's mainly
   | 1980-03-31 00:00:00 |  -0.129  | -0.0664 | -0.0101 | -0.0955 |
   | 1980-04-30 00:00:00 |   0.0397 |  0.0105 |  0.0106 | -0.0043 |
 
+>``.drop_rf()`` will return the DataFrame without the `RF` column. You can also drop the "Mkt-RF" column with ``.drop_mkt()``
 
 ### CLI
 
 ``bash >=4.2``
 
-* You can also use getfactormodels from the command line. It's very barebones, here's the `-h` (there is no `--help` yet)
+* You can also use getfactormodels from the command line. It's very barebones, here's the `-h`:
 
   ```shell
   $ getfactormodels -h
 
-  usage: getfactormodels [-h] -m MODEL [-f FREQ] [-s START] [-e END] [-o OUTPUT] [--no_rf]
+  usage: getfactormodels [-h] -m MODEL [-f FREQ] [-s START] [-e END] [-o OUTPUT] [--no_rf] [--no_mkt]
   ```
 
 * An example of how to use the CLI to retrieve the Fama-French 3-factor model data:
@@ -161,7 +162,8 @@ There's also a ``FactorExtractor`` class (which doesn't do much yet, it's mainly
   ```shell
   $ getfactormodels -m ff5 -f Y -s 1960-01-01 -e 2020-12-31 --no_rf -o ~/some_dir/filename.xlsx
   ```
-
+* To return the factors without the risk-free rate `RF`, or the excess market return `Mkt-RF`, columns:
+  
 ## Data Availability
 
 >[TODO]
@@ -209,10 +211,9 @@ There's also a ``FactorExtractor`` class (which doesn't do much yet, it's mainly
 
 * The first `hml_devil_factors()` retrieval is slow, because the download from aqr.com is slow. It's the only model, so far, implementing a cache—daily data expires at the end of the day and is only re-downloaded when the requested`end_date` exceeds the file's last index date. Similar for monthly, expiring at EOM and re-downloaded when needed.
 
-
 #### Todo
 
 - [ ] Docs
   - [ ] Examples
 - [ ] Tests
-- [ ] Error handling
\ No newline at end of file
+- [ ] Error handling
diff --git a/getfactormodels/__init__.py b/getfactormodels/__init__.py
index 91ca3f1..f7d785e 100644
--- a/getfactormodels/__init__.py
+++ b/getfactormodels/__init__.py
@@ -10,8 +10,8 @@
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
@@ -23,7 +23,7 @@
 __version__ = "0.0.3"
 
 from .__main__ import FactorExtractor, get_factors
-from .models import models  # noqa: F401
+from .models import models  # noqa: F401, RUF100 (silent flake8 in VScode)
 from .models.models import (barillas_shanken_factors, carhart_factors,
                             dhs_factors, ff_factors, hml_devil_factors,
                             icr_factors, liquidity_factors, mispricing_factors,
diff --git a/getfactormodels/__main__.py b/getfactormodels/__main__.py
index 349a693..2378e49 100755
--- a/getfactormodels/__main__.py
+++ b/getfactormodels/__main__.py
@@ -1,24 +1,26 @@
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 from pathlib import Path
 import pandas as pd
 from dateutil import parser
 # ruff: noqa: RUF100
 from getfactormodels.models.models import \
-    barillas_shanken_factors  # noqa: F401, E501
-from getfactormodels.models.models import (carhart_factors, dhs_factors,
+    barillas_shanken_factors  # noqa: F401
+from getfactormodels.models.models import (carhart_factors, dhs_factors,  # noqa: F401, E501
                                            ff_factors, hml_devil_factors,
                                            icr_factors, liquidity_factors,
                                            mispricing_factors,
                                            q_classic_factors, q_factors)
 from getfactormodels.utils.cli import parse_args
 from getfactormodels.utils.utils import _get_model_key, _process
+from typing import Optional
 
 
 def get_factors(model: str = "3",
-                frequency: str = "M",
-                start_date=None,
-                end_date=None,
-                output=None) -> pd.DataFrame:
+                frequency: Optional[str] = "M",
+                start_date: Optional[str] = None,
+                end_date: Optional[str] = None,
+                output: Optional[str] = None) -> pd.DataFrame:
     """Get data for a specified factor model.
 
     Return a DataFrame containing the data for the specified model and
@@ -80,11 +82,11 @@ class FactorExtractor:
     """
 
     def __init__(self,
-                 model='3',
-                 frequency='M',
-                 start_date=None,
-                 end_date=None,
-                 output=None):
+                 model: str = '3',
+                 frequency: Optional[str] = 'M',
+                 start_date: Optional[str] = None,
+                 end_date: Optional[str] = None,
+                 output: Optional[str] = None):
         self.model: str = model
         self.frequency: str = frequency
         self.start_date = self.validate_date_format(start_date) if start_date \
@@ -105,7 +107,7 @@ def no_mkt(self):
         self._no_mkt = True
 
     @staticmethod
-    def validate_date_format(date_string):
+    def validate_date_format(date_string: str) -> str:
         """
         Validate the date format.
 
@@ -123,7 +125,8 @@ def get_factors(self) -> pd.DataFrame:
             model=self.model,
             frequency=self.frequency,
             start_date=self.start_date,
-            end_date=self.end_date)
+            end_date=self.end_date,
+            output=self.output)
 
         if self._no_rf:
             self.df = self.drop_rf(self.df)
@@ -132,7 +135,7 @@ def get_factors(self) -> pd.DataFrame:
 
         return self.df
 
-    def drop_rf(self, df):
+    def drop_rf(self, df: pd.DataFrame = None) -> pd.DataFrame:
         """Drop the ``RF`` column from the DataFrame."""
         # get_factors if not already done
         if df is None:
@@ -145,7 +148,7 @@ def drop_rf(self, df):
 
         return df
 
-    def drop_mkt(self, df):
+    def drop_mkt(self, df: pd.DataFrame = None) -> pd.DataFrame:
         """Drop the ``MKT`` column from the DataFrame."""
         if df is None:
             df = self.get_factors()
@@ -157,7 +160,7 @@ def drop_mkt(self, df):
 
         return df
 
-    def to_file(self, filename):
+    def to_file(self, filename: str):
         """
         Save the factor data to a file.
 
diff --git a/getfactormodels/models/ff_models.py b/getfactormodels/models/ff_models.py
index 5ccdcd4..9b82af7 100644
--- a/getfactormodels/models/ff_models.py
+++ b/getfactormodels/models/ff_models.py
@@ -20,19 +20,23 @@
     models construction.
 
 """
+# ruff: noqa: PLR2004
+from __future__ import annotations
+from typing import Optional
 import numpy as np
 import pandas as pd
 from ..utils.utils import (  # noqa - todo: fix relative import from parent modules banned
     _process, get_zip_from_url)
 
 
-def _ff_construct_url(model="3", frequency="M"):
+def _ff_construct_url(model: str = "3", frequency: str = "M") -> str:
     """Construct and return the URL for the specified model and frequency."""
     frequency = frequency.upper()
 
-    if frequency == "W" and model not in ["3", "4"]:  # why 4?
-        raise ValueError("Weekly data is only available for the Fama \
-                         French 3 factor model at the moment.")
+    if frequency == "W" and model not in ["3", "4"]:
+        error_message = "Weekly data is only available for the Fama French \
+            3 factor model at the moment."
+        raise ValueError(error_message)
 
     base_url = "https://mba.tuck.dartmouth.edu"
     ftp = "pages/faculty/ken.french/ftp"
@@ -48,7 +52,8 @@ def _ff_construct_url(model="3", frequency="M"):
     return f"{base_url}/{ftp}/{file}"
 
 
-def _ff_read_csv_from_zip(zip_file, model=None):
+def _ff_read_csv_from_zip(zip_file,
+                          model: Optional[str] = None) -> pd.DataFrame:
     """Read the FF Factors CSV into a dataframe."""
     try:
         filename = zip_file.namelist()[0]
@@ -72,7 +77,8 @@ def _ff_read_csv_from_zip(zip_file, model=None):
     return data
 
 
-def _ff_process_data(data, model, frequency) -> pd.DataFrame:
+def _ff_process_data(data: pd.DataFrame,
+                     model, frequency) -> pd.DataFrame:
     """Process and return the data based on the provided model and frequency.
     """
     frequency = frequency.lower()
@@ -105,7 +111,7 @@ def _ff_process_data(data, model, frequency) -> pd.DataFrame:
     return data
 
 
-def _ff_get_mom(frequency) -> pd.Series:
+def _ff_get_mom(frequency: str = "M") -> pd.Series:
     """Fetch and return the momentum factor data as a pd.Series.
         * Note: only for returning the raw data for the 4 and 6 factor models.
     """
@@ -128,8 +134,8 @@ def _ff_get_mom(frequency) -> pd.Series:
 
 def _get_ff_factors(model: str = "3",
                     frequency: str = "M",
-                    start_date=None,
-                    end_date=None) -> pd.DataFrame:
+                    start_date: Optional[str] = None,
+                    end_date: Optional[str] = None) -> pd.DataFrame:
     """Return the Fama French 3, 5, or 6, or Carhart 4 factor model data.
 
         * Note: This is the function that's called by get_ff_factors in main.
@@ -138,13 +144,16 @@ def _get_ff_factors(model: str = "3",
         frequency = "M"
 
     if frequency.upper() not in ["D", "M", "Y", "W"]:
-        raise ValueError("Frequency must be one of: D, M, Y, or W.")
+        err_msg = "Invalid frequency passed to get_ff_factors: "
+        err_msg += f"   Frequency '{frequency}' not in ff_model `{model}`."
+        raise ValueError(err_msg)
+
     elif model not in ["3", "5", "6", "4"]:
-        raise ValueError(f"Invalid model passed to private function \
-                     _get_ff_factors, must be one of: 3, 5, 6, or 4, \
-                     not {model}. If you see this error message please \
-                     submit an issue at:\
-                         https://github.com/x512/getfactormodels/issues/")
+        err_msg = "Invalid model passed to get_ff_factors, must be one of: "
+        err_msg += "3, 5, 6, or 4, not {model}."
+        err_msg += "If you see this error message please submit an issue at:"
+        err_msg += "    https://github.com/x512/getfactormodels/issues/"
+        raise ValueError(err_msg)
 
     url = _ff_construct_url(model, frequency)
     zip = get_zip_from_url(url)
diff --git a/getfactormodels/models/models.py b/getfactormodels/models/models.py
index effac33..aba760c 100644
--- a/getfactormodels/models/models.py
+++ b/getfactormodels/models/models.py
@@ -25,6 +25,7 @@
 - ``barillas_shanken_factors`` relies on ``hml_devil_factors``, so it's also
     slow.
 """
+from __future__ import annotations
 import datetime
 from io import BytesIO
 from typing import Optional, Union
@@ -35,6 +36,7 @@
 from getfactormodels.utils.utils import _process, get_file_from_url
 from .ff_models import _get_ff_factors
 
+
 # TODO: "PEP 484 prohibits implicit `Optional`" see: RUFF013.
 
 
@@ -226,9 +228,9 @@ def dhs_factors(frequency: str = "M",
     url = base_url + file
 
     response = requests.get(url, verify=True, timeout=20)
-    file = BytesIO(response.content)
+    content = BytesIO(response.content)
 
-    data = pd.read_excel(file, index_col="Date",
+    data = pd.read_excel(content, index_col="Date",
                          usecols=['Date', 'FIN', 'PEAD'], engine='openpyxl',
                          header=0, parse_dates=False)
     data.index.name = "date"
@@ -328,7 +330,7 @@ def carhart_factors(frequency: str = "M",
 cache = cachetools.TTLCache(maxsize=100, ttl=86400)
 
 
-def _download_hml_devil(frequency):
+def _download_hml_devil(frequency: str = 'M') -> pd.DataFrame:
     base_url = 'https://www.aqr.com/-/media/AQR/Documents/Insights/'
     file = 'daily' if frequency.lower() == 'd' else 'monthly'
     url = f'{base_url}/Data-Sets/The-Devil-in-HMLs-Details-Factors-{file}.xlsx'
@@ -364,31 +366,41 @@ def _download_hml_devil(frequency):
     return data
 
 
-def _get_hml_devil(frequency='M',
+# TODO: FIXME: HML Devil isn't using cache in cli. see /utils/cli.py probably. MKT -> Mkt-RF also!! Needs to be fixed before a swap-out hml for hm_devil func!  # noqa: E501
+
+def _get_hml_devil(frequency: str = 'M',
                    start_date: Optional[str] = None,
                    end_date: Optional[str] = None,
                    output: Optional[str] = None,
-                   series=False) -> Union[pd.Series, pd.DataFrame]:
+                   series: bool = False) -> Union[pd.Series, pd.DataFrame]:
 
-    data = _download_hml_devil(frequency)
+    # Use the current date as a cache key
+    current_date = datetime.date.today()
+    cache_key = ('hmld', frequency, None, None, None, None, current_date)
 
-    data.index.name = 'date'
-    data.index = pd.to_datetime(data.index)
+    # Check if the data is in the cache
+    data = cache.get(cache_key)
+    if data is not None:
+        return data
 
-    if frequency.lower() == 'd':
-        data = data.dropna()
+    # If the data is not in the cache, download it
+    data = _download_hml_devil()
 
-    if series:
-        return _process(data, start_date, end_date, filepath=output).HML_DEVIL
+    # Apply transformations to the data
+    data = data.rename(columns={'MKT': 'Mkt-RF', 'HML Devil': 'HML_DEVIL'})
+    data = data.astype(float)
 
-    return _process(data, start_date, end_date, filepath=output)
+    # Store the transformed data in the cache
+    cache[cache_key] = data
+
+    return data
 
 
-def hml_devil_factors(frequency='M',
+def hml_devil_factors(frequency: str = 'M',
                       start_date: Optional[str] = None,
                       end_date: Optional[str] = None,
                       output: Optional[str] = None,
-                      series=False) -> Union[pd.Series, pd.DataFrame]:
+                      series: bool = False) -> Union[pd.Series, pd.DataFrame]:
     """***EXPERIMENTAL***
 
     Retrieve the HML Devil factors from AQR.com. [FIXME: Slow.]
@@ -409,25 +421,12 @@ def hml_devil_factors(frequency='M',
         pd.DataFrame: the HML Devil model data indexed by date.
         pd.Series: the HML factor as a pd.Series
     """
-    # Use the current date as a cache key
-    current_date = datetime.date.today()
-    cache_key = (frequency, None, None, None, None, current_date)
+    data = _get_hml_devil(frequency, start_date, end_date, series=series)
 
-    # If the result is in the cache, return it if not saving
-    if cache_key in cache:
-        result = cache[cache_key]
-        if end_date:
-            end_date = pd.to_datetime(end_date)
-            result = result.loc[result.index <= end_date]
-
-        return _process(result, start_date, end_date, filepath=output)
-
-    # Otherwise, compute the result and store it in the cache
-    data = _get_hml_devil(frequency, start_date, end_date, output, series)
+    data = data.dropna()
 
-    # UMD returns NaNs for 1926
+    data = data.rename(columns={'MKT': 'Mkt-RF'})
     data = data.dropna()
-    cache[cache_key] = data
 
     return _process(data, start_date, end_date, filepath=output)
 
diff --git a/getfactormodels/utils/cli.py b/getfactormodels/utils/cli.py
index bef1654..6846599 100644
--- a/getfactormodels/utils/cli.py
+++ b/getfactormodels/utils/cli.py
@@ -2,7 +2,7 @@
 import argparse
 
 
-def parse_args():
+def parse_args() -> argparse.Namespace:
     """Argument parser, allowing for command line arguments.
     This is the function used in pyproject.toml to run the CLI."""
     parser = argparse.ArgumentParser(
@@ -26,5 +26,5 @@ def parse_args():
     parser.add_argument('--no_rf', '--no-rf', '--norf', action='store_true',
                         help='Drop the RF column from the DataFrame.')
     parser.add_argument('--no_mkt', '--no-mkt', '--nomkt', action='store_true',
-                        help='Drop the MKT column from the DataFrame.')
+                        help='Drop the Mkt-RF column from the DataFrame.')
     return parser.parse_args()
diff --git a/getfactormodels/utils/utils.py b/getfactormodels/utils/utils.py
index 60a78d7..b1bb27a 100644
--- a/getfactormodels/utils/utils.py
+++ b/getfactormodels/utils/utils.py
@@ -156,7 +156,10 @@ def _slice_dates(data, start_date=None, end_date=None):
     return data.loc[slice(start_date, end_date)]
 
 
-def _process(data, start_date=None, end_date=None, filepath=None):
+def _process(data: pd.DataFrame,
+             start_date: str = None,
+             end_date: str = None,
+             filepath: str = None) -> pd.DataFrame:
     """Process the data and optionally save it to a file.
     Note: the `filepath` takes a filename, path or directory.
     """
diff --git a/pyproject.toml b/pyproject.toml
index c2043f0..4c85fd5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,8 +26,8 @@ classifiers = [
     "Development Status :: 2 - Pre-Alpha"
 ]
 requires-python = ">=3.7"   # Will lower soon
-dependencies = [ "numpy >=1.18.5",
-                 "pandas >=1.4",
+dependencies = [ "pandas >=1.4",
+                 "numpy >=1.18.5",
                  "requests >=2.20.0",
                  "pyarrow >=14.0.1",
                  "openpyxl >=3.0.3",
@@ -35,8 +35,8 @@ dependencies = [ "numpy >=1.18.5",
                  "cachetools==5.3.2" ]
 
 [project.optional-dependencies]
-dev = ["flit>=3.2,<=3.9", "ruff==0.1.6", "pytest-cov", "pytest>=7.0", "isort",
-       "pytest-randomly", "nox==2023.4.22"]
+dev = ["flit>=3.2,<=3.9", "ruff>=0.1.6", "pytest-cov", "pytest>=7.0",
+       "isort>=5.12", "pytest-randomly", "nox==2023.4.22"]
 
 [project.urls]
 "Homepage" = "https://github.com/x512/getfactormodels"
@@ -56,7 +56,7 @@ lines_between_sections = false
 
 [tool.ruff]
 line-length = 79
-target-version = "py38"
+target-version = "py312"
 indent-width = 4
 respect-gitignore = true
 
@@ -67,21 +67,24 @@ skip-magic-trailing-comma = false
 line-ending = "auto"
 
 # Some rules in preview, enable them:
-# https://docs.astral.sh/ruff/settings/#format-preview
-preview = true  # enabled because of: E241
+## docs.astral.sh/ruff/settings/#format-preview
+preview = true
 
 [tool.ruff.lint]
 exclude = [".git", ".git-rewrite",]
 # run `ruff linter` to see all available rules
 ## see: docs.astral.sh/ruff/rules/
 select = ["E4", "E7", "E9", "F", "B",
-          "DTZ", "W2", "W5", "N",
+          "DTZ", "W2", "W5", "N", "PL",
           "NPY", "SIM", "TID", "PD",
           "E241", "S", "PTH", "RUF", 
-          "FIX001", "FIX002", "TD004",
-          "TD005", "TD007", "E501", "E261"]
+          "FIX001", "TD004", "EM",
+          "TD005", "TD007", "E501",
+          "FA", "FIX002", "ANN",   # fix
+          "E261"]                        # requires '--preview'
         # "TCH" (flake 8 typechecking)
         # "C901" (McCabe complexity)
+        # "CPY" Copyright notices
 
 # Undo soon:
 ignore = ["PD901",]  # TODO: fix all generic variable name `df` for DataFrames
@@ -91,8 +94,8 @@ fixable = ["W29", "W5", "E241", "E261"]
 unfixable = ["B", "FIX001", "FIX002", "UP"]
 
 [tool.ruff.per-file-ignores]
-"main.py" = ["UP007"]
 "**/__init__.py" = ["F401"]
+"__main__.py" = ["F401"]  # the function names are constructed with a key
 
 [tool.coverage.run]
 omit = ['tests/*']

From 1a7a0618178e4200c2701a89185af22448d908e3 Mon Sep 17 00:00:00 2001
From: x512 <x512@pm.me>
Date: Sun, 24 Dec 2023 05:08:29 +1100
Subject: [PATCH 10/17] fix: ``hml_devil`` using a persistent cache

---
 getfactormodels/__main__.py      |  4 +-
 getfactormodels/models/models.py | 90 ++++++++++++++------------------
 getfactormodels/utils/utils.py   |  2 +-
 3 files changed, 42 insertions(+), 54 deletions(-)

diff --git a/getfactormodels/__main__.py b/getfactormodels/__main__.py
index 2378e49..bd0c58e 100755
--- a/getfactormodels/__main__.py
+++ b/getfactormodels/__main__.py
@@ -129,9 +129,9 @@ def get_factors(self) -> pd.DataFrame:
             output=self.output)
 
         if self._no_rf:
-            self.df = self.drop_rf(self.df)
+            self.df = self.drop_rf(self.df.copy())  # create a copy before drop -- use cache.
         if self._no_mkt:
-            self.df = self.drop_mkt(self.df)
+            self.df = self.drop_mkt(self.df.copy())
 
         return self.df
 
diff --git a/getfactormodels/models/models.py b/getfactormodels/models/models.py
index aba760c..4eb50b7 100644
--- a/getfactormodels/models/models.py
+++ b/getfactormodels/models/models.py
@@ -29,7 +29,8 @@
 import datetime
 from io import BytesIO
 from typing import Optional, Union
-import cachetools
+import diskcache as dc
+import os
 import numpy as np
 import pandas as pd
 import requests
@@ -326,19 +327,20 @@ def carhart_factors(frequency: str = "M",
 
 # =========================== EXPERIMENTAL ================================== #
 
-# Create a cache with a TTL (time-to-live) of one day
-cache = cachetools.TTLCache(maxsize=100, ttl=86400)
 
+cache_dir = os.path.expanduser('~/.cache/getfactormodels/aqr/hml_devil')
+os.makedirs(cache_dir, exist_ok=True)
+cache = dc.Cache(cache_dir)
 
-def _download_hml_devil(frequency: str = 'M') -> pd.DataFrame:
-    base_url = 'https://www.aqr.com/-/media/AQR/Documents/Insights/'
-    file = 'daily' if frequency.lower() == 'd' else 'monthly'
-    url = f'{base_url}/Data-Sets/The-Devil-in-HMLs-Details-Factors-{file}.xlsx'
-
-    print('Downloading HML Devil factors from AQR... This can take a while. Please be patient or something.')  # noqa: E501
+def _aqr_download_data(url: str) -> pd.DataFrame:
+    """Download the data from the given URL."""
+    print('Downloading data... This can take a while. Please be patient.')
     response = requests.get(url, verify=True, timeout=180)
     xls = pd.ExcelFile(BytesIO(response.content))
+    return xls
 
+def _aqr_process_data(xls: pd.ExcelFile) -> pd.DataFrame:
+    """Process the downloaded data."""
     sheets = {0: 'HML Devil', 4: 'MKT', 5: 'SMB', 7: 'UMD', 8: 'RF'}
     dfs = []
 
@@ -351,53 +353,21 @@ def _download_hml_devil(frequency: str = 'M') -> pd.DataFrame:
 
     for sheet_index, sheet_name in sheets.items():
         df = df_dict[sheet_name]
-
         df = df[['USA']] if sheet_index != 8 else df.iloc[:, 0:1]
-
         df.columns = [sheet_name]
         dfs.append(df)
-
+    # Drop NaNs but only RF UMD
     data = pd.concat(dfs, axis=1)
-    data.rename(columns={'MKT': 'Mkt-RF',
-                         'HML Devil': 'HML_DEVIL'})
-
+    data = data.dropna(subset=['RF', 'UMD'])
+    data.rename(columns={'MKT': 'Mkt-RF', 'HML Devil': 'HML_DEVIL'}, inplace=True)
     data = data.astype(float)
 
-    return data
-
-
-# TODO: FIXME: HML Devil isn't using cache in cli. see /utils/cli.py probably. MKT -> Mkt-RF also!! Needs to be fixed before a swap-out hml for hm_devil func!  # noqa: E501
-
-def _get_hml_devil(frequency: str = 'M',
-                   start_date: Optional[str] = None,
-                   end_date: Optional[str] = None,
-                   output: Optional[str] = None,
-                   series: bool = False) -> Union[pd.Series, pd.DataFrame]:
-
-    # Use the current date as a cache key
-    current_date = datetime.date.today()
-    cache_key = ('hmld', frequency, None, None, None, None, current_date)
-
-    # Check if the data is in the cache
-    data = cache.get(cache_key)
-    if data is not None:
-        return data
-
-    # If the data is not in the cache, download it
-    data = _download_hml_devil()
-
-    # Apply transformations to the data
-    data = data.rename(columns={'MKT': 'Mkt-RF', 'HML Devil': 'HML_DEVIL'})
-    data = data.astype(float)
-
-    # Store the transformed data in the cache
-    cache[cache_key] = data
+    data = data.dropna()
 
     return data
 
 
-def hml_devil_factors(frequency: str = 'M',
-                      start_date: Optional[str] = None,
+def hml_devil_factors(frequency: str = 'M', start_date: Optional[str] = None,
                       end_date: Optional[str] = None,
                       output: Optional[str] = None,
                       series: bool = False) -> Union[pd.Series, pd.DataFrame]:
@@ -421,14 +391,32 @@ def hml_devil_factors(frequency: str = 'M',
         pd.DataFrame: the HML Devil model data indexed by date.
         pd.Series: the HML factor as a pd.Series
     """
-    data = _get_hml_devil(frequency, start_date, end_date, series=series)
 
-    data = data.dropna()
+    base_url = 'https://www.aqr.com/-/media/AQR/Documents/Insights/'
+    file = 'daily' if frequency.lower() == 'd' else 'monthly'
+    url = f'{base_url}/Data-Sets/The-Devil-in-HMLs-Details-Factors-{file}.xlsx'
 
-    data = data.rename(columns={'MKT': 'Mkt-RF'})
-    data = data.dropna()
+    # Use the current date as a cache key
+    current_date = datetime.date.today().strftime('%Y-%m-%d')
+    cache_key = ('hmld', frequency, None, None, None, None, current_date)
 
-    return _process(data, start_date, end_date, filepath=output)
+    # Check if the data is in the cache
+    data = cache.get(cache_key, default=None)
+    if data is not None:
+        print("Using cached data")
+        return data
+
+    # If the data is not in the cache, download it
+    print("Not using cache, downloading data")
+    xls = _aqr_download_data(url)
+
+    # Process the downloaded data
+    data = _aqr_process_data(xls)
+
+    # Store the processed data in the cache
+    cache.set(cache_key, data, expire=86400)  # TTL is set here
+
+    return data
 
 
 def barillas_shanken_factors(frequency: str = 'M',
diff --git a/getfactormodels/utils/utils.py b/getfactormodels/utils/utils.py
index b1bb27a..d52396b 100644
--- a/getfactormodels/utils/utils.py
+++ b/getfactormodels/utils/utils.py
@@ -20,7 +20,7 @@
     "liquidity": r"^(il)?liq(uidity)?|(pastor|ps|sp)$",
     "icr": r"\bicr|hkm\b",
     "dhs": r"^(\bdhs\b|behav.*)$",
-    "hml_devil": r"\bhml(_)?d(evil)?\b",
+    "hml_devil": r"\bhml(_)?d(evil)?|hmld\b",
     "barillas_shanken": r"\b(bs|bs6|barillas|shanken)\b", })
 
 

From afd752f7ddcf7a6507928c196648bb902584731d Mon Sep 17 00:00:00 2001
From: x512 <x512@pm.me>
Date: Sun, 24 Dec 2023 05:12:42 +1100
Subject: [PATCH 11/17] fix: ``hml_devil_factors()`` using persistent cache
 (``diskcache``)

---
 getfactormodels/__main__.py      | 9 +++++----
 getfactormodels/models/models.py | 3 +--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/getfactormodels/__main__.py b/getfactormodels/__main__.py
index bd0c58e..d964f9f 100755
--- a/getfactormodels/__main__.py
+++ b/getfactormodels/__main__.py
@@ -1,19 +1,20 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 from pathlib import Path
+from typing import Optional
 import pandas as pd
 from dateutil import parser
 # ruff: noqa: RUF100
 from getfactormodels.models.models import \
     barillas_shanken_factors  # noqa: F401
-from getfactormodels.models.models import (carhart_factors, dhs_factors,  # noqa: F401, E501
-                                           ff_factors, hml_devil_factors,
-                                           icr_factors, liquidity_factors,
+from getfactormodels.models.models import (carhart_factors,  # noqa: F401, E501
+                                           dhs_factors, ff_factors,
+                                           hml_devil_factors, icr_factors,
+                                           liquidity_factors,
                                            mispricing_factors,
                                            q_classic_factors, q_factors)
 from getfactormodels.utils.cli import parse_args
 from getfactormodels.utils.utils import _get_model_key, _process
-from typing import Optional
 
 
 def get_factors(model: str = "3",
diff --git a/getfactormodels/models/models.py b/getfactormodels/models/models.py
index 4eb50b7..d09159b 100644
--- a/getfactormodels/models/models.py
+++ b/getfactormodels/models/models.py
@@ -27,17 +27,16 @@
 """
 from __future__ import annotations
 import datetime
+import os
 from io import BytesIO
 from typing import Optional, Union
 import diskcache as dc
-import os
 import numpy as np
 import pandas as pd
 import requests
 from getfactormodels.utils.utils import _process, get_file_from_url
 from .ff_models import _get_ff_factors
 
-
 # TODO: "PEP 484 prohibits implicit `Optional`" see: RUFF013.
 
 

From b075b741e23cb5822c9ac243673809700fedc048 Mon Sep 17 00:00:00 2001
From: x512 <x512@pm.me>
Date: Sun, 24 Dec 2023 05:45:41 +1100
Subject: [PATCH 12/17] fix: cli `--model` with ``hml_devil`` now using cache

---
 getfactormodels/__main__.py      | 13 +++----
 getfactormodels/models/models.py | 62 +++++++++++++++++---------------
 2 files changed, 40 insertions(+), 35 deletions(-)

diff --git a/getfactormodels/__main__.py b/getfactormodels/__main__.py
index d964f9f..6660592 100755
--- a/getfactormodels/__main__.py
+++ b/getfactormodels/__main__.py
@@ -7,8 +7,8 @@
 # ruff: noqa: RUF100
 from getfactormodels.models.models import \
     barillas_shanken_factors  # noqa: F401
-from getfactormodels.models.models import (carhart_factors,  # noqa: F401, E501
-                                           dhs_factors, ff_factors,
+from getfactormodels.models.models import carhart_factors  # noqa: F401, E501
+from getfactormodels.models.models import (dhs_factors, ff_factors,
                                            hml_devil_factors, icr_factors,
                                            liquidity_factors,
                                            mispricing_factors,
@@ -99,11 +99,11 @@ def __init__(self,
         self._no_mkt = False
         self.df = None
 
-    def no_rf(self):
+    def no_rf(self) -> None:
         """Sets the _no_rf flag to True."""
         self._no_rf = True
 
-    def no_mkt(self):
+    def no_mkt(self) -> None:
         """Sets the _no_mkt flag to True."""
         self._no_mkt = True
 
@@ -118,7 +118,8 @@ def validate_date_format(date_string: str) -> str:
         try:
             return parser.parse(date_string).strftime("%Y-%m-%d")
         except ValueError as err:
-            raise ValueError("Incorrect date format, use YYYY-MM-DD.") from err
+            error_message = "Incorrect date format, use YYYY-MM-DD."
+            raise ValueError(error_message) from err
 
     def get_factors(self) -> pd.DataFrame:
         """Fetch the factor data and store it in the class."""
@@ -130,7 +131,7 @@ def get_factors(self) -> pd.DataFrame:
             output=self.output)
 
         if self._no_rf:
-            self.df = self.drop_rf(self.df.copy())  # create a copy before drop -- use cache.
+            self.df = self.drop_rf(self.df.copy())  # create a copy before drop
         if self._no_mkt:
             self.df = self.drop_mkt(self.df.copy())
 
diff --git a/getfactormodels/models/models.py b/getfactormodels/models/models.py
index d09159b..8ec5479 100644
--- a/getfactormodels/models/models.py
+++ b/getfactormodels/models/models.py
@@ -27,8 +27,8 @@
 """
 from __future__ import annotations
 import datetime
-import os
 from io import BytesIO
+from pathlib import Path
 from typing import Optional, Union
 import diskcache as dc
 import numpy as np
@@ -37,8 +37,6 @@
 from getfactormodels.utils.utils import _process, get_file_from_url
 from .ff_models import _get_ff_factors
 
-# TODO: "PEP 484 prohibits implicit `Optional`" see: RUFF013.
-
 
 def ff_factors(model: str = "3",
                frequency: str = "M",
@@ -86,8 +84,9 @@ def liquidity_factors(frequency: str = "M",
     url += '-/media/research/famamiller/data/liq_data_1962_2022.txt'
 
     if frequency.lower() != 'm':
+        err_msg = "Frequency must be 'm'."
         print('Liquidity factors are only available for monthly frequency.')
-        raise ValueError("Frequency must be 'm'.")
+        raise ValueError(err_msg)
 
     # Get .csv here...
     data = get_file_from_url(url)
@@ -126,9 +125,9 @@ def mispricing_factors(frequency: str = "M",
                        output: Optional[str] = None) -> pd.DataFrame:
     """Retrieve the Stambaugh-Yuan mispricing factors. Daily and monthly."""
     if frequency.lower() not in ["d", "m"]:
-        print("Mispricing factors are only available for daily and monthly \
-            frequency.")
-        raise ValueError("Frequency must be 'd' or 'm'.")
+        error_msg = "Mispricing factors are only available for daily and\
+                     monthly frequency."
+        raise ValueError(error_msg)
         return None
 
     file = "M4d" if frequency == "d" else "M4"
@@ -216,16 +215,16 @@ def dhs_factors(frequency: str = "M",
     frequency = frequency.lower()
     base_url = "https://docs.google.com/spreadsheets/d/"
 
-    if frequency.lower() == "m":
-        file = "1RxYLbCfk19m8fnniiJYfaj3yI55ZPaoi/export?format=xlsx"
-    elif frequency.lower() == "d":
-        file = "1KnCP-NVhf2Sni8bVFIVyMxW-vIljBOWE/export?format=xlsx"
+    if frequency == "m":
+        sheet = "1RxYLbCfk19m8fnniiJYfaj3yI55ZPaoi/export?format=xlsx"
+    elif frequency == "d":
+        sheet = "1KnCP-NVhf2Sni8bVFIVyMxW-vIljBOWE/export?format=xlsx"
     else:
-        print("Frequency must be either 'M' (monthly) or 'D' (daily).")
-        raise ValueError("Frequency must be 'M' or 'D'.")
-    # TODO: use the link to the Google Sheet instead of the actual sheet.
+        error_message = "Frequency must be 'm' or 'd' for the DHHS factors'."
+        print(error_message)
+        raise ValueError(error_message)
 
-    url = base_url + file
+    url = base_url + sheet
 
     response = requests.get(url, verify=True, timeout=20)
     content = BytesIO(response.content)
@@ -262,11 +261,12 @@ def icr_factors(frequency: str = "M",
     """Retrieve the He, Kelly, Manela (2017) ICR factors.
     * Daily since 1999-05-03; quarterly and monthly since 1970.
     """
-    # TODO: Do we need Mkt-RF and RF [seen reffered to as 2-factor model]?
+    # TODO: Do we need Mkt-RF and RF [seen referred to as 2-factor model. Also liq doesnt have mkt-rf or rf]? # noqa
     frequency = frequency.lower()
 
     if frequency not in ["d", "m", "q"]:
-        raise ValueError("Frequency must be 'd', 'm' or 'q'.")
+        err_msg = "Frequency must be 'd', 'm' or 'q'."
+        raise ValueError(err_msg)
 
     base_url = "https://voices.uchicago.edu/zhiguohe"
     file = {"d": "daily", "m": "monthly", "q": "quarterly"}.get(frequency)
@@ -327,10 +327,11 @@ def carhart_factors(frequency: str = "M",
 # =========================== EXPERIMENTAL ================================== #
 
 
-cache_dir = os.path.expanduser('~/.cache/getfactormodels/aqr/hml_devil')
-os.makedirs(cache_dir, exist_ok=True)
+cache_dir = Path('~/.cache/getfactormodels/aqr/hml_devil').expanduser()
+cache_dir.mkdir(parents=True, exist_ok=True)
 cache = dc.Cache(cache_dir)
 
+
 def _aqr_download_data(url: str) -> pd.DataFrame:
     """Download the data from the given URL."""
     print('Downloading data... This can take a while. Please be patient.')
@@ -338,6 +339,7 @@ def _aqr_download_data(url: str) -> pd.DataFrame:
     xls = pd.ExcelFile(BytesIO(response.content))
     return xls
 
+
 def _aqr_process_data(xls: pd.ExcelFile) -> pd.DataFrame:
     """Process the downloaded data."""
     sheets = {0: 'HML Devil', 4: 'MKT', 5: 'SMB', 7: 'UMD', 8: 'RF'}
@@ -352,16 +354,17 @@ def _aqr_process_data(xls: pd.ExcelFile) -> pd.DataFrame:
 
     for sheet_index, sheet_name in sheets.items():
         df = df_dict[sheet_name]
-        df = df[['USA']] if sheet_index != 8 else df.iloc[:, 0:1]
+        df = df[['USA']] if sheet_index != 8 else df.iloc[:, 0:1]  # noqa
         df.columns = [sheet_name]
         dfs.append(df)
-    # Drop NaNs but only RF UMD
+
     data = pd.concat(dfs, axis=1)
+
     data = data.dropna(subset=['RF', 'UMD'])
-    data.rename(columns={'MKT': 'Mkt-RF', 'HML Devil': 'HML_DEVIL'}, inplace=True)
-    data = data.astype(float)
 
-    data = data.dropna()
+    data.rename(columns={'MKT': 'Mkt-RF', 'HML Devil': 'HML_DEVIL'})
+
+    data = data.astype(float)
 
     return data
 
@@ -395,13 +398,14 @@ def hml_devil_factors(frequency: str = 'M', start_date: Optional[str] = None,
     file = 'daily' if frequency.lower() == 'd' else 'monthly'
     url = f'{base_url}/Data-Sets/The-Devil-in-HMLs-Details-Factors-{file}.xlsx'
 
-    # Use the current date as a cache key
+    # Use the current date and end date as a cache key
     current_date = datetime.date.today().strftime('%Y-%m-%d')
-    cache_key = ('hmld', frequency, None, None, None, None, current_date)
+    cache_key = ('hmld', frequency, None, None, None, None, current_date,
+                 end_date)
 
     # Check if the data is in the cache
-    data = cache.get(cache_key, default=None)
-    if data is not None:
+    data, cached_end_date = cache.get(cache_key, default=(None, None))
+    if data is not None and (end_date is None or end_date <= cached_end_date):
         print("Using cached data")
         return data
 
@@ -413,7 +417,7 @@ def hml_devil_factors(frequency: str = 'M', start_date: Optional[str] = None,
     data = _aqr_process_data(xls)
 
     # Store the processed data in the cache
-    cache.set(cache_key, data, expire=86400)  # TTL is set here
+    cache[cache_key] = (data, end_date)  # TTL is set here
 
     return data
 

From 08d8fd3267e190e1345bbf7ef54993e4e8d5fd04 Mon Sep 17 00:00:00 2001
From: x512 <x512@pm.me>
Date: Sun, 24 Dec 2023 05:54:14 +1100
Subject: [PATCH 13/17] fix: ``hml_devil`` monthly TypeError

---
 getfactormodels/models/models.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/getfactormodels/models/models.py b/getfactormodels/models/models.py
index 8ec5479..c4e8b4b 100644
--- a/getfactormodels/models/models.py
+++ b/getfactormodels/models/models.py
@@ -362,7 +362,7 @@ def _aqr_process_data(xls: pd.ExcelFile) -> pd.DataFrame:
 
     data = data.dropna(subset=['RF', 'UMD'])
 
-    data.rename(columns={'MKT': 'Mkt-RF', 'HML Devil': 'HML_DEVIL'})
+    data.rename(columns={'MKT': 'Mkt-RF', 'HML Devil': 'HML_Devil'})
 
     data = data.astype(float)
 
@@ -449,7 +449,7 @@ def barillas_shanken_factors(frequency: str = 'M',
     hml_devil = hml_devil_factors(frequency=frequency, start_date=start_date,
                                   series=True)
 
-    hml_devil = hml_devil.rename('HML_m')
+    hml_devil = hml_devil.rename(columns={'HML_Devil': 'HML_m'})
     hml_devil.index.name = 'date'
 
     df = df.merge(hml_devil, left_index=True,

From a57134c83fbdfea0cb2b2ab20958500b296f6f73 Mon Sep 17 00:00:00 2001
From: x512 <x512@pm.me>
Date: Sun, 24 Dec 2023 06:34:20 +1100
Subject: [PATCH 14/17] fix: typo (series not df for bs); confirm
 ``hml_devil_factors`` using cache

---
 getfactormodels/models/models.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/getfactormodels/models/models.py b/getfactormodels/models/models.py
index c4e8b4b..ae4b689 100644
--- a/getfactormodels/models/models.py
+++ b/getfactormodels/models/models.py
@@ -447,12 +447,11 @@ def barillas_shanken_factors(frequency: str = 'M',
     df = q.merge(ff, left_index=True, right_index=True, how='inner')
 
     hml_devil = hml_devil_factors(frequency=frequency, start_date=start_date,
-                                  series=True)
-
-    hml_devil = hml_devil.rename(columns={'HML_Devil': 'HML_m'})
+                                  series=True)[['HML Devil']]
+    
     hml_devil.index.name = 'date'
 
-    df = df.merge(hml_devil, left_index=True,
-                  right_index=True, how='inner')
+    hml_devil = hml_devil.rename(columns={'HML Devil': 'HML_m'})
+    df = df.merge(hml_devil, left_index=True, right_index=True, how='inner')
 
     return _process(df, start_date, end_date, filepath=output)

From a060c0a80249b926f7473547f384a9a5358bd4d3 Mon Sep 17 00:00:00 2001
From: x512 <x512@pm.me>
Date: Sun, 24 Dec 2023 06:47:35 +1100
Subject: [PATCH 15/17] prepare version 0.0.4

---
 README.md                          | 2 +-
 getfactormodels/__init__.py        | 2 +-
 getfactormodels/models/__init__.py | 4 ++--
 pyproject.toml                     | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 8d76048..9379e9c 100644
--- a/README.md
+++ b/README.md
@@ -38,7 +38,7 @@ _Thanks to: Kenneth French, Robert Stambaugh, Lin Sun, Zhiguo He, AQR Capital Ma
 
 >[!IMPORTANT]
 >![PyPI - Status](https://img.shields.io/pypi/status/getfactormodels?style=flat-square)
-
+>
 >``getfactormodels`` is new. It was released on December 20, 2023. Don't rely on it for anything.
 
 After installation, import and call the ``get_factors()`` function with the ``model`` and ``frequency`` params:
diff --git a/getfactormodels/__init__.py b/getfactormodels/__init__.py
index f7d785e..a480868 100644
--- a/getfactormodels/__init__.py
+++ b/getfactormodels/__init__.py
@@ -20,7 +20,7 @@
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
-__version__ = "0.0.3"
+__version__ = "0.0.4"
 
 from .__main__ import FactorExtractor, get_factors
 from .models import models  # noqa: F401, RUF100 (silent flake8 in VScode)
diff --git a/getfactormodels/models/__init__.py b/getfactormodels/models/__init__.py
index 9be4f54..f314af1 100644
--- a/getfactormodels/models/__init__.py
+++ b/getfactormodels/models/__init__.py
@@ -20,5 +20,5 @@
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
-from . import ff_models  # noqa: F401 - TODO: disable 401 in all __init__
-from . import models  # noqa: F401
+from . import ff_models
+from . import models
diff --git a/pyproject.toml b/pyproject.toml
index 4c85fd5..5c6ed10 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "getfactormodels"
 dynamic = ["version"]
 description = "Retrieve data for various multifactor asset pricing models."
-authors = [{name = "S. Martin", email = "x512@pm.me"}]
+authors = [{name = "S. Martin", email = "x512@pm.me"}, ]
 license = {file = "LICENSE"}
 readme = "README.md"
 keywords = ['finance', 'pricing models', 'financial analysis', 'econometrics',

From bb8cbcd27339691cb0b639e2f85c0ce05d25b18a Mon Sep 17 00:00:00 2001
From: x512 <x512@pm.me>
Date: Sun, 24 Dec 2023 07:05:15 +1100
Subject: [PATCH 16/17] prepared: v 0.0.4

---
 getfactormodels/models/__init__.py | 3 +--
 getfactormodels/models/models.py   | 7 ++++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/getfactormodels/models/__init__.py b/getfactormodels/models/__init__.py
index f314af1..e12f70d 100644
--- a/getfactormodels/models/__init__.py
+++ b/getfactormodels/models/__init__.py
@@ -20,5 +20,4 @@
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
-from . import ff_models
-from . import models
+from . import ff_models, models
diff --git a/getfactormodels/models/models.py b/getfactormodels/models/models.py
index ae4b689..a093b83 100644
--- a/getfactormodels/models/models.py
+++ b/getfactormodels/models/models.py
@@ -362,7 +362,8 @@ def _aqr_process_data(xls: pd.ExcelFile) -> pd.DataFrame:
 
     data = data.dropna(subset=['RF', 'UMD'])
 
-    data.rename(columns={'MKT': 'Mkt-RF', 'HML Devil': 'HML_Devil'})
+    data.rename(columns={'MKT': 'Mkt-RF', 'HML Devil': 'HML_Devil'},
+                inplace=True)
 
     data = data.astype(float)
 
@@ -447,11 +448,11 @@ def barillas_shanken_factors(frequency: str = 'M',
     df = q.merge(ff, left_index=True, right_index=True, how='inner')
 
     hml_devil = hml_devil_factors(frequency=frequency, start_date=start_date,
-                                  series=True)[['HML Devil']]
+                                  series=True)[['HML _evil']]
     
     hml_devil.index.name = 'date'
 
-    hml_devil = hml_devil.rename(columns={'HML Devil': 'HML_m'})
+    hml_devil = hml_devil.rename(columns={'HML_Devil': 'HML_m'})
     df = df.merge(hml_devil, left_index=True, right_index=True, how='inner')
 
     return _process(df, start_date, end_date, filepath=output)

From 4202c449bfbfad0c8985fa8ba2ca850d2b4eb36b Mon Sep 17 00:00:00 2001
From: x512 <x512@pm.me>
Date: Sun, 24 Dec 2023 07:18:50 +1100
Subject: [PATCH 17/17] prepared v 0.0.4 now

---
 getfactormodels/models/models.py | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/getfactormodels/models/models.py b/getfactormodels/models/models.py
index a093b83..d3c364d 100644
--- a/getfactormodels/models/models.py
+++ b/getfactormodels/models/models.py
@@ -220,7 +220,7 @@ def dhs_factors(frequency: str = "M",
     elif frequency == "d":
         sheet = "1KnCP-NVhf2Sni8bVFIVyMxW-vIljBOWE/export?format=xlsx"
     else:
-        error_message = "Frequency must be 'm' or 'd' for the DHHS factors'."
+        error_message = "Frequency must be 'm' or 'd' for the DHS factors'."
         print(error_message)
         raise ValueError(error_message)
 
@@ -362,9 +362,6 @@ def _aqr_process_data(xls: pd.ExcelFile) -> pd.DataFrame:
 
     data = data.dropna(subset=['RF', 'UMD'])
 
-    data.rename(columns={'MKT': 'Mkt-RF', 'HML Devil': 'HML_Devil'},
-                inplace=True)
-
     data = data.astype(float)
 
     return data
@@ -394,12 +391,10 @@ def hml_devil_factors(frequency: str = 'M', start_date: Optional[str] = None,
         pd.DataFrame: the HML Devil model data indexed by date.
         pd.Series: the HML factor as a pd.Series
     """
-
     base_url = 'https://www.aqr.com/-/media/AQR/Documents/Insights/'
     file = 'daily' if frequency.lower() == 'd' else 'monthly'
     url = f'{base_url}/Data-Sets/The-Devil-in-HMLs-Details-Factors-{file}.xlsx'
 
-    # Use the current date and end date as a cache key
     current_date = datetime.date.today().strftime('%Y-%m-%d')
     cache_key = ('hmld', frequency, None, None, None, None, current_date,
                  end_date)
@@ -407,15 +402,15 @@ def hml_devil_factors(frequency: str = 'M', start_date: Optional[str] = None,
     # Check if the data is in the cache
     data, cached_end_date = cache.get(cache_key, default=(None, None))
     if data is not None and (end_date is None or end_date <= cached_end_date):
-        print("Using cached data")
+        # Use it if it is and the end date is the same or earlier
         return data
 
-    # If the data is not in the cache, download it
-    print("Not using cache, downloading data")
     xls = _aqr_download_data(url)
 
     # Process the downloaded data
     data = _aqr_process_data(xls)
+    data.rename(columns={'MKT': 'Mkt-RF', 'HML Devil': 'HML_Devil'},
+                inplace=True)
 
     # Store the processed data in the cache
     cache[cache_key] = (data, end_date)  # TTL is set here
@@ -448,11 +443,11 @@ def barillas_shanken_factors(frequency: str = 'M',
     df = q.merge(ff, left_index=True, right_index=True, how='inner')
 
     hml_devil = hml_devil_factors(frequency=frequency, start_date=start_date,
-                                  series=True)[['HML _evil']]
+                                  series=True)[['HML Devil']]
     
     hml_devil.index.name = 'date'
 
-    hml_devil = hml_devil.rename(columns={'HML_Devil': 'HML_m'})
+    hml_devil = hml_devil.rename(columns={'HML Devil': 'HML_m'})
     df = df.merge(hml_devil, left_index=True, right_index=True, how='inner')
 
     return _process(df, start_date, end_date, filepath=output)