diff --git a/mec/blp.py b/mec/blp.py index 0598b7f..698b7fc 100644 --- a/mec/blp.py +++ b/mec/blp.py @@ -1,6 +1,29 @@ import numpy as np, scipy.sparse as sp + +def create_blp_instruments(X, mkts_firms_prods,include_ones = False, include_arguments = True ): + if include_ones: + X = np.block([[np.ones((X.shape[0],1)), X ]] ) + df = pd.DataFrame() + names = [str(i) for i in range(X.shape[1])] + df[ names ]=X + df[['mkt','firm','prod']] = mkts_firms_prods + thelist1, thelist2 = [], [] + for _, theserie in df[ names ].items(): + thelist1.append ([theserie[(df['mkt']==df['mkt'][i]) & + (df['firm']==df['firm'][i]) & + (df['prod']!=df['prod'][i]) ].sum() for i,_ in df.iterrows() ]) + + thelist2.append([theserie[(df['mkt']==df['mkt'][i]) & + (df['firm']!=df['firm'][i]) ].sum() for i,_ in df.iterrows() ]) + if include_arguments: + return np.block([[X,np.array(thelist1+thelist2).T]]) + else: + return np.array(thelist1+thelist2).T + + + def organize_markets(markets_o, vec_o): flatten = (len(vec_o.shape)==1) or (vec_o.shape[1] ==1) vs_y =[] diff --git a/mec/data.py b/mec/data.py index 70caf4d..1294020 100644 --- a/mec/data.py +++ b/mec/data.py @@ -108,7 +108,7 @@ def getcleandata(name,nrow): -def load_blp_data(pyblp_compatibility=True): +def load_blp_data(check_pyblp_compatibility=False): """ Returns the data used by Berry, Levinsohn and Pakes (2005) @@ -202,7 +202,7 @@ def load_blp_data(pyblp_compatibility=True): prev_model_cid = prods[(prods['car_ids'] == chars[iref,1]) ]['clustering_ids'].iloc[0] prods.loc[(prods['car_ids'] == chars[i,1]), 'clustering_ids']= prev_model_cid - if pyblp_compatibility: + if check_pyblp_compatibility: # sometimes there are multiple products with the same name, same year; it is not clear how BLP treat that but the following # ensures compatibility with pyblp by adjusting them manually cids_to_change = {1540: 'TYCORO71', 1542: 'TYCORO71', 1582: 'TYCORO71', 560: 'MCMONT73', 1084: 'OD9878', 1230: 'OD9878', @@ -243,7 +243,7 @@ def load_blp_data(pyblp_compatibility=True): agent_data['market_ids']=agent_data['market_ids'].astype(int) # check consistency with pyblp - if pyblp_compatibility: + if check_pyblp_compatibility: import pyblp # check if product data coincide product_data = pd.read_csv(pyblp.data.BLP_PRODUCTS_LOCATION) diff --git a/setup.py b/setup.py index 0d90dc5..ffe2cde 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name="mec", - version="0.172", + version="0.173", authors=["Alfred Galichon"], author_email="ag133@nyu.edu", licence="",