Source code for skysurvey.tools.snana
"""
This module provides utilities to help `SNANA` users.
"""
import numpy as np
import pandas
import warnings
[docs]
def parse_simlib(simlib):
""" Parse a single snana simlib file.
Parameters
----------
simlib: str
Path to the simlib file.
Returns
-------
data: `pandas.DataFrame`
Concatenated DataFrame of all blocks.
metadata: `pandas.DataFrame`
DataFrame of metadata for each block.
"""
file_ = open(simlib, "r").read().splitlines()
i_start = [ i for i, f_ in enumerate(file_) if f_.startswith("BEGIN LIBGEN") ]
i_end = [ i for i, f_ in enumerate(file_) if f_.startswith("END_LIBID") ]
if len(i_start) != 1:
raise ValueError("Exactly 1 'BEGIN LIBGEN' is expected, {len(i_start)} found.")
dfs = []
metas = []
blocks = i_start+i_end
for block_range in zip(blocks[:-1], blocks[1:]):
block = file_[block_range[0]:block_range[1]]
df, meta = parse_simlib_block(block)
dfs.append(df)
metas.append(meta)
data = pandas.concat(dfs, keys=np.arange(len(dfs)))
metadata = pandas.concat(metas, keys=np.arange(len(dfs)), axis=1).T
return data, metadata
[docs]
def parse_simlib_block(block):
""" Parse a single snana simlib block.
Parameters
----------
block: list of str
Lines corresponding to a single block. If no 'READ' line or multiple 'READ' line, will raise a ValueError.
Returns
-------
dataframe: `pandas.DataFrame`
meta: `pandas.Series` or None
Returns None if metadata parsing fails.
"""
read_start = [ i for i, f_ in enumerate(block) if " READ " in f_]
if len(read_start) == 0:
raise ValueError("cannot parse input block. No 'READ' line found")
if len(read_start) > 1:
raise ValueError(f"cannot parse input block. Multiple 'READ' lines found {read_start}")
# ok this is the line with READ on it.
read_start = read_start[0]
# columns
columns = [block_strip.lower() for block_ in block[read_start+1].replace("#", "").split()
if len(block_strip:=block_.strip())>1]
# data
data_block = block[read_start+2:]
data = []
for block_line in data_block:
try:
data_, comments = block_line.split("#")
except Exception as e:
warnings.warn(e)
print(block_line)
return
case, data_ = data_.split(":")
data_ = data_.split()
data.append([case]+data_+[comments.strip()])
dataframe = pandas.DataFrame(data, columns=["case"]+columns+["comments"])
# metadata
try:
meta_block = block[:read_start]
meta = " ".join([meta_.split("#")[0] for meta_ in meta_block
if not meta_.startswith("#") and len(meta_)>0
and "LIBGEN" not in meta_]
).replace(": ", ":").split()
meta = pandas.Series({k.lower():v for k,v in [meta_.split(":") for meta_ in meta]})
except Exception as e:
warnings.warn(e)
print(f"failed meta for {meta_block}")
meta= None
return dataframe, meta
### DES ####
[docs]
def parse_simlib_des(simlib):
""" Parse a single snana simlib file adapted for a DES simlib file.
Parameters
----------
simlib: str
Path to the simlib file.
Returns
-------
data: `pandas.DataFrame`
Concatenated DataFrame of all blocks.
metadata: `pandas.DataFrame`
DataFrame of metadata for each block.
"""
file_ = open(simlib, "r").read().splitlines()
i_start = [ i for i, f_ in enumerate(file_) if f_.startswith("BEGIN LIBGEN") ]
i_end = [ i for i, f_ in enumerate(file_) if f_.startswith("END_LIBID") ]
if len(i_start) != 1:
raise ValueError("Exactly 1 'BEGIN LIBGEN' is expected, {len(i_start)} found.")
dfs = []
metas = []
blocks = i_start+i_end
for block_range in zip(blocks[:-1], blocks[1:]):
block = file_[block_range[0]:block_range[1]]
df, meta = parse_simlib_block(block)
dfs.append(df)
metas.append(meta)
data = pandas.concat(dfs, keys=np.arange(len(dfs)))
metadata = pandas.concat(metas, keys=np.arange(len(dfs)), axis=1).T
return data, metadata
[docs]
def parse_simlib_block_des(block):
""" Parse a single snana simlib block, adapted for DES simlib blocks.
Parameters
----------
block: list of str
Lines corresponding to a single block.
Returns
-------
dataframe: `pandas.DataFrame`
meta: `pandas.Series` or None
Returns None if metadata parsing fails.
"""
read_start = [ i for i, f_ in enumerate(block) if " READ " in f_]
if len(read_start) == 0:
raise ValueError("cannot parse input block. No 'READ' line found")
if len(read_start) > 1:
raise ValueError(f"cannot parse input block. multiple 'READ' lines found {read_start}")
# ok this is the line with READ on it.
read_start = read_start[0]
# columns
columns = [l_strip.lower() for line_ in block[read_start+1].replace("#", "").split()
if len(l_strip := line_.strip())>1]
# data
data_block = block[read_start+2:]
data = []
for block_line in data_block:
try:
data_, comments = block_line.split("#")
except Exception as e:
warnings.warn(e)
print(block_line)
return
case, data_ = data_.split(":")
data_ = data_.split()
data.append([case]+data_+[comments.strip()])
dataframe = pandas.DataFrame(data, columns=["case"]+columns+["comments"])
# metadata
try:
meta_block = block[:read_start]
meta = " ".join([block_.split("#")[0] for block_ in meta_block
if not block_.startswith("#") and len(block_)>0
and "LIBGEN" not in block_]
).replace(": ", ":").split()
meta = pandas.Series({k.lower():v for k,v in [meta_.split(":") for meta_ in meta]})
except Exception as e:
warnings.warn(e)
print(f"failed meta for {meta_block}")
meta= None
return dataframe, meta