"""
Data structures for point-source strong lens observations.
Point-source lensing arises when the background source is compact enough to be treated
as a point (e.g. a quasar, supernova, or compact radio source). Gravitational lensing
splits the source into multiple images whose positions, fluxes, and time delays constrain
the lens mass distribution.
``PointDataset`` holds the image-plane positions, fluxes, and time delays of a single
named point source together with their noise maps. The ``name`` attribute is used to
pair this dataset with the corresponding ``Point`` model component during fitting.
When multiple point sources are fitted simultaneously (for example many multiply-imaged
background sources in a strong-lens cluster) they are collected in a plain Python
``list`` of ``PointDataset`` objects.
Two I/O surfaces are supported:
- JSON (via :func:`autoconf.output_to_json` / :func:`autoconf.from_json`) — exact
round-trip, one file per ``PointDataset``; the canonical modeling input.
- CSV (via :meth:`PointDataset.to_csv` / :meth:`PointDataset.from_csv` and the
module-level :func:`output_to_csv` / :func:`list_from_csv`) — one row per observed
image, grouped by ``name``, so that tens or hundreds of cluster-scale point sources
can be edited in a single spreadsheet.
"""
from autoconf import csvable
from typing import List, Tuple, Optional, Union
import autoarray as aa
_BASE_HEADERS = ["name", "y", "x", "positions_noise"]
_FLUX_HEADERS = ["flux", "flux_noise"]
_TIME_DELAY_HEADERS = ["time_delay", "time_delay_noise"]
_REDSHIFT_HEADERS = ["redshift"]
[docs]
class PointDataset:
def __init__(
self,
name: str,
positions: Union[aa.Grid2DIrregular, List[List], List[Tuple]],
positions_noise_map: Union[float, aa.ArrayIrregular, List[float]],
fluxes: Optional[Union[aa.ArrayIrregular, List[float]]] = None,
fluxes_noise_map: Optional[Union[float, aa.ArrayIrregular, List[float]]] = None,
time_delays: Optional[Union[aa.ArrayIrregular, List[float]]] = None,
time_delays_noise_map: Optional[
Union[float, aa.ArrayIrregular, List[float]]
] = None,
redshift: Optional[float] = None,
):
"""
A collection of the data component that can be used for point-source model-fitting, for example fitting the
observed positions of a a strongly lensed quasar or supernovae or in strong lens cluster modeling, where
there may be many tens or hundreds of individual source galaxies each of which are modeled as a point source.
The name of the dataset is required for point-source model-fitting, as it pairs a point-source dataset with
its corresponding point-source in the model-fit. For example, if a dataset has the name `source_1`, it will
be paired with the `Point` model-component which has the name `source_1`. If a dataset component is not
successfully paired with a model-component, an error is raised.
Parameters
----------
name
The name of the point source dataset which is paired to a `Point` in the `Model`.
positions
The image-plane (y,x) positions of the point-source.
positions_noise_map
The noise-value of every (y,x) position, which is typically the pixel-scale of the data.
fluxes
The image-plane flux of each observed point-source of light.
fluxes_noise_map
The noise-value of every observed flux, which is typically measured from the pixel values of the pixel
containing the point source after convolution with the PSF.
time_delays
The time delays of each observed point-source of light in days.
time_delays_noise_map
The noise-value of every observed time delay, which is typically measured from the time delay analysis.
redshift
The redshift of the source. Optional; when provided it is carried through CSV round-trips alongside
the positions so cluster-scale workflows can encode per-source redshifts in a single spreadsheet.
"""
self.name = name
# Ensure positions is a Grid2DIrregular
self.positions = (
positions
if isinstance(positions, aa.Grid2DIrregular)
else aa.Grid2DIrregular(values=positions)
)
# Ensure positions_noise_map is an ArrayIrregular
if isinstance(positions_noise_map, float):
positions_noise_map = [positions_noise_map] * len(self.positions)
self.positions_noise_map = (
positions_noise_map
if isinstance(positions_noise_map, aa.ArrayIrregular)
else aa.ArrayIrregular(values=positions_noise_map)
)
def convert_to_array_irregular(values):
"""
Convert data to ArrayIrregular if it is not already.
"""
return (
aa.ArrayIrregular(values=values)
if values is not None and not isinstance(values, aa.ArrayIrregular)
else values
)
# Convert fluxes, time delays and their noise maps to ArrayIrregular if provided as values and not already this type
self.fluxes = convert_to_array_irregular(fluxes)
self.fluxes_noise_map = convert_to_array_irregular(fluxes_noise_map)
self.time_delays = convert_to_array_irregular(time_delays)
self.time_delays_noise_map = convert_to_array_irregular(time_delays_noise_map)
self.redshift = float(redshift) if redshift is not None else None
@property
def info(self) -> str:
"""
A dictionary representation of this instance.
Arrays are represented as lists or lists of lists.
"""
info = f"name : {self.name}\n"
info += f"positions : {self.positions}\n"
info += f"positions_noise_map : {self.positions_noise_map}\n"
info += f"fluxes : {self.fluxes}\n"
info += f"fluxes_noise_map : {self.fluxes_noise_map}\n"
info += f"time_delays : {self.time_delays}\n"
info += f"time_delays_noise_map : {self.time_delays_noise_map}\n"
info += f"redshift : {self.redshift}\n"
return info
[docs]
def extent_from(self, buffer: float = 0.1):
y_max = max(self.positions[:, 0]) + buffer
y_min = min(self.positions[:, 0]) - buffer
x_max = max(self.positions[:, 1]) + buffer
x_min = min(self.positions[:, 1]) - buffer
return [y_min, y_max, x_min, x_max]
[docs]
def to_csv(self, file_path: str):
"""
Write this dataset to ``file_path`` as a CSV with one row per image.
Optional flux / time-delay columns are included only when this dataset carries
the corresponding values. For multi-dataset output use :func:`output_to_csv`.
"""
output_to_csv([self], file_path)
[docs]
@classmethod
def from_csv(
cls, file_path: str, name: Optional[str] = None
) -> "PointDataset":
"""
Load a single ``PointDataset`` from a CSV written by :meth:`to_csv` or
:func:`output_to_csv`.
Parameters
----------
file_path
Path to a CSV file with at minimum the columns
``name, y, x, positions_noise``.
name
The ``name`` group to load. Must be provided when the CSV contains more
than one ``name``; when the CSV contains exactly one group it is picked
automatically.
"""
datasets = list_from_csv(file_path)
if not datasets:
raise ValueError(
f"CSV file {file_path!r} contained no PointDataset rows."
)
if name is None:
if len(datasets) > 1:
available = [d.name for d in datasets]
raise ValueError(
f"CSV file {file_path!r} contains {len(datasets)} groups "
f"({available!r}); pass name= to select one."
)
return datasets[0]
for dataset in datasets:
if dataset.name == name:
return dataset
available = [d.name for d in datasets]
raise ValueError(
f"CSV file {file_path!r} has no group named {name!r}. "
f"Available groups: {available!r}."
)
def _optional_values(dataset: PointDataset, attr: str) -> Optional[List[float]]:
values = getattr(dataset, attr)
if values is None:
return None
return [float(v) for v in values]
def output_to_csv(datasets: List[PointDataset], file_path: str):
"""
Write a list of ``PointDataset`` objects to a single CSV with one row per observed
image.
The base columns (``name, y, x, positions_noise``) are always written. The
optional ``flux``/``flux_noise``, ``time_delay``/``time_delay_noise`` and
``redshift`` columns are included when *any* dataset in ``datasets`` carries
those values; datasets that do not carry them leave those cells blank.
When written, every row in a given ``name`` group repeats the same ``redshift``
value — the source redshift is a per-source property, not per-image.
This is the hand-editable / spreadsheet form preferred for strong-lens cluster
workflows with tens or hundreds of multiply-imaged sources. For exact
round-trip serialisation use ``output_to_json`` / ``from_json``.
"""
include_flux = any(d.fluxes is not None for d in datasets)
include_time_delay = any(d.time_delays is not None for d in datasets)
include_redshift = any(d.redshift is not None for d in datasets)
headers = list(_BASE_HEADERS)
if include_flux:
headers += _FLUX_HEADERS
if include_time_delay:
headers += _TIME_DELAY_HEADERS
if include_redshift:
headers += _REDSHIFT_HEADERS
rows = []
for dataset in datasets:
positions = dataset.positions
positions_noise = _optional_values(dataset, "positions_noise_map")
fluxes = _optional_values(dataset, "fluxes")
fluxes_noise = _optional_values(dataset, "fluxes_noise_map")
time_delays = _optional_values(dataset, "time_delays")
time_delays_noise = _optional_values(dataset, "time_delays_noise_map")
for i in range(len(positions)):
row = {
"name": dataset.name,
"y": float(positions[i][0]),
"x": float(positions[i][1]),
"positions_noise": positions_noise[i],
}
if include_flux:
row["flux"] = "" if fluxes is None else fluxes[i]
row["flux_noise"] = (
"" if fluxes_noise is None else fluxes_noise[i]
)
if include_time_delay:
row["time_delay"] = (
"" if time_delays is None else time_delays[i]
)
row["time_delay_noise"] = (
"" if time_delays_noise is None else time_delays_noise[i]
)
if include_redshift:
row["redshift"] = (
"" if dataset.redshift is None else dataset.redshift
)
rows.append(row)
csvable.output_to_csv(rows, file_path, headers=headers)
def _float_column(
group_rows: List[dict], column: str, group_name: str
) -> Optional[List[float]]:
raw = [row.get(column, "") for row in group_rows]
populated = [v for v in raw if v not in ("", None)]
if not populated:
return None
if len(populated) != len(raw):
raise ValueError(
f"CSV group {group_name!r} has partially populated column "
f"{column!r}; every row in the group must have a value or all be blank."
)
return [float(v) for v in raw]
def _group_redshift(
group_rows: List[dict], group_name: str
) -> Optional[float]:
raw = [row.get("redshift", "") for row in group_rows]
populated = [v for v in raw if v not in ("", None)]
if not populated:
return None
if len(populated) != len(raw):
raise ValueError(
f"CSV group {group_name!r} has partially populated column "
f"'redshift'; every row in the group must have a value or all be blank."
)
values = [float(v) for v in populated]
if any(v != values[0] for v in values):
raise ValueError(
f"CSV group {group_name!r} has inconsistent 'redshift' values "
f"{values!r}; a source redshift must be identical across all of its "
f"image rows."
)
return values[0]
def list_from_csv(file_path: str) -> List[PointDataset]:
"""
Load a list of ``PointDataset`` objects from a CSV written by
:func:`output_to_csv` (or :meth:`PointDataset.to_csv`).
Rows are grouped by their ``name`` column — one ``PointDataset`` per distinct
name, preserving the order of first appearance. Optional per-image columns
(``flux``/``flux_noise``, ``time_delay``/``time_delay_noise``) are carried through
per-group: if every row in a group populates the column the values are loaded,
if every row leaves it blank the corresponding attribute is set to ``None``, and
any partial-population is rejected with a ``ValueError``.
The optional ``redshift`` column is per-source (not per-image): every row within
a group must share the same value. A group with mixed or differing redshifts is
rejected with a ``ValueError``.
"""
rows = csvable.list_from_csv(file_path)
if not rows:
return []
headers = list(rows[0].keys())
for required in _BASE_HEADERS:
if required not in headers:
raise ValueError(
f"CSV file {file_path!r} is missing required column {required!r}; "
f"expected headers starting with {_BASE_HEADERS!r}."
)
groups: "dict[str, List[dict]]" = {}
for row in rows:
groups.setdefault(row["name"], []).append(row)
has_flux_column = "flux" in headers
has_flux_noise_column = "flux_noise" in headers
has_time_delay_column = "time_delay" in headers
has_time_delay_noise_column = "time_delay_noise" in headers
has_redshift_column = "redshift" in headers
datasets: List[PointDataset] = []
for name, group_rows in groups.items():
positions = [(float(r["y"]), float(r["x"])) for r in group_rows]
positions_noise_map = [
float(r["positions_noise"]) for r in group_rows
]
fluxes = (
_float_column(group_rows, "flux", name)
if has_flux_column
else None
)
fluxes_noise_map = (
_float_column(group_rows, "flux_noise", name)
if has_flux_noise_column
else None
)
time_delays = (
_float_column(group_rows, "time_delay", name)
if has_time_delay_column
else None
)
time_delays_noise_map = (
_float_column(group_rows, "time_delay_noise", name)
if has_time_delay_noise_column
else None
)
redshift = (
_group_redshift(group_rows, name)
if has_redshift_column
else None
)
datasets.append(
PointDataset(
name=name,
positions=positions,
positions_noise_map=positions_noise_map,
fluxes=fluxes,
fluxes_noise_map=fluxes_noise_map,
time_delays=time_delays,
time_delays_noise_map=time_delays_noise_map,
redshift=redshift,
)
)
return datasets