Skip to content

Commit 99e78ff

Browse files
RyanMullinsLIT team
authored and
LIT team
committed
Changes model and dataset loading to use init_spec() and constructors.
* Changes Global Settings to provide UI elements based on the Model or Dataset's init_spec() contained in a LitApp's metadata. * Changes the create_dataset and create_model HTTP APIs to: * Accepting and validating config objects via the POST data * Initialize Models and Datasets by passing configs to Python constructors * Raising errors instead of returning None, so that the user can see where/understand why instantiation failed. * Updating the client's ApiService to pass init_spec configs from the UI to the HTTP Requests. PiperOrigin-RevId: 523096706
1 parent c0ff131 commit 99e78ff

File tree

11 files changed

+549
-187
lines changed

11 files changed

+549
-187
lines changed

lit_nlp/app.py

+99-36
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import functools
1818
import glob
1919
import math
20-
import os
2120
import random
2221
import threading
2322
import time
@@ -56,6 +55,39 @@
5655
ModelLoadersMap = dict[str, ModelLoader]
5756

5857

58+
# TODO(b/277249726): Move this function to utils.py, add tests, and expand usage
59+
# across HTTP and Interpreter APIs.
60+
def _validate_config_against_spec(config: JsonDict, spec: types.Spec):
61+
"""Validates that the provided config is compatible with the Spec.
62+
63+
Args:
64+
config: The configuration parameters, typically extracted from the data of
65+
an HTTP Request, that are to be used in a function call.
66+
spec: A Spec defining the shape of allowed configuration parameters for the
67+
associated LIT component.
68+
69+
Raises:
70+
KeyError: Under two conditions: 1) the `config` is missing one or more
71+
required fields defined in the `spec`, or 2) the `config` contains fields
72+
not defined in the `spec`. Either of these conditions would likely result
73+
in a TypeError (for missing or unexpected arguments) if the `config` was
74+
used in a call.
75+
"""
76+
missing_required_keys = [
77+
param_name for param_name, param_type in spec.items()
78+
if param_type.required and param_name not in config
79+
]
80+
if missing_required_keys:
81+
raise KeyError(f'Missing required parameters: {missing_required_keys}')
82+
83+
unsupported_keys = [
84+
param_name for param_name in config
85+
if param_name not in spec
86+
]
87+
if unsupported_keys:
88+
raise KeyError(f'Received unsupported parameters: {unsupported_keys}')
89+
90+
5991
class LitApp(object):
6092
"""LIT WSGI application."""
6193

@@ -300,44 +332,68 @@ def _get_dataset(self,
300332
return list(self._datasets[dataset_name].indexed_examples)
301333

302334
def _create_dataset(self,
303-
unused_data,
335+
data: JsonDict,
304336
dataset_name: Optional[str] = None,
305-
dataset_path: Optional[str] = None,
306337
**unused_kw):
307-
"""Create dataset from a path, updating and returning the metadata."""
338+
"""Create a dataset, updating and returning the metadata."""
339+
if dataset_name is None:
340+
raise ValueError('No base dataset specified.')
308341

309-
assert dataset_name is not None, 'No dataset specified.'
310-
assert dataset_path is not None, 'No dataset path specified.'
311-
312-
new_dataset = self._datasets[dataset_name].load(dataset_path)
313-
if new_dataset is not None:
314-
new_dataset_name = dataset_name + '-' + os.path.basename(dataset_path)
315-
self._datasets[new_dataset_name] = new_dataset
316-
self._info = self._build_metadata()
317-
return (self._info, new_dataset_name)
318-
else:
319-
logging.error('Not able to load: %s', dataset_name)
320-
return None
342+
config: Optional[JsonDict] = data.get('config')
343+
if config is None:
344+
raise ValueError('No config specified.')
345+
346+
new_name: Optional[str] = config.pop('new_name', None)
347+
if new_name is None:
348+
raise ValueError('No name provided for the new dataset.')
349+
350+
if (loader_info := self._dataset_loaders.get(dataset_name)) is None:
351+
raise ValueError(
352+
f'No loader information (Cls + init_spec) found for {dataset_name}')
353+
354+
dataset_cls, dataset_init_spec = loader_info
355+
356+
if dataset_init_spec is not None:
357+
_validate_config_against_spec(config, dataset_init_spec)
358+
359+
new_dataset = dataset_cls(**config)
360+
annotated_dataset = self._run_annotators(new_dataset)
361+
self._datasets[new_name] = lit_dataset.IndexedDataset(
362+
base=annotated_dataset, id_fn=caching.input_hash
363+
)
364+
self._info = self._build_metadata()
365+
return (self._info, new_name)
321366

322367
def _create_model(self,
323-
unused_data,
368+
data: JsonDict,
324369
model_name: Optional[str] = None,
325-
model_path: Optional[str] = None,
326370
**unused_kw):
327-
"""Create model from a path, updating and returning the metadata."""
328-
329-
assert model_name is not None, 'No model specified.'
330-
assert model_path is not None, 'No model path specified.'
331-
# Load using the underlying model class, then wrap explicitly in a cache.
332-
new_model = self._models[model_name].wrapped.load(model_path)
333-
if new_model is not None:
334-
new_model_name = model_name + ':' + os.path.basename(model_path)
335-
self._models[new_model_name] = caching.CachingModelWrapper(
336-
new_model, new_model_name, cache_dir=self._data_dir)
337-
self._info = self._build_metadata()
338-
return (self._info, new_model_name)
339-
else:
340-
return None
371+
"""Create a model, updating and returning the metadata."""
372+
if model_name is None:
373+
raise ValueError('No base model specified.')
374+
375+
config: Optional[JsonDict] = data.get('config')
376+
if config is None:
377+
raise ValueError('No config specified.')
378+
379+
new_name: Optional[str] = config.pop('new_name', None)
380+
if new_name is None:
381+
raise ValueError('No name provided for the new model.')
382+
383+
if (loader_info := self._model_loaders.get(model_name)) is None:
384+
raise ValueError(
385+
f'No loader information (Cls + init_spec) found for {model_name}')
386+
387+
model_cls, model_init_spec = loader_info
388+
389+
if model_init_spec is not None:
390+
_validate_config_against_spec(config, model_init_spec)
391+
392+
new_model = model_cls(**config)
393+
self._models[new_name] = caching.CachingModelWrapper(
394+
new_model, new_name, cache_dir=self._data_dir)
395+
self._info = self._build_metadata()
396+
return (self._info, new_name)
341397

342398
def _get_generated(self, data, model: str, dataset_name: str, generator: str,
343399
**unused_kw):
@@ -505,12 +561,19 @@ def _handler(app: wsgi_app.App, request, environ):
505561
# but for requests from Python we may want to use the invertible encoding
506562
# so that datatypes from remote models are the same as local ones.
507563
response_simple_json = utils.coerce_bool(
508-
kw.pop('response_simple_json', True))
564+
kw.pop('response_simple_json', True)
565+
)
509566
data = serialize.from_json(request.data) if len(request.data) else None
510567
# Special handling to dereference IDs.
511-
if data and 'inputs' in data.keys() and 'dataset_name' in kw:
512-
data['inputs'] = self._reconstitute_inputs(data['inputs'],
513-
kw['dataset_name'])
568+
if (
569+
data
570+
and 'inputs' in data.keys()
571+
and len(data.get('inputs'))
572+
and 'dataset_name' in kw
573+
):
574+
data['inputs'] = self._reconstitute_inputs(
575+
data['inputs'], kw['dataset_name']
576+
)
514577

515578
outputs = fn(data, **kw)
516579
response_body = serialize.to_json(outputs, simple=response_simple_json)

lit_nlp/client/core/global_settings.css

+69-1
Original file line numberDiff line numberDiff line change
@@ -160,9 +160,9 @@ a:hover {
160160

161161
.config-list {
162162
border: 1px solid #dadce0;
163+
flex: 1 1;
163164
margin: 12pt;
164165
overflow-y: scroll;
165-
height: 100%;
166166
}
167167

168168
.config-line {
@@ -200,6 +200,74 @@ a:hover {
200200
margin-left: 40pt;
201201
}
202202

203+
.model-dataset-loading {
204+
flex: 1 1;
205+
margin: 0 12pt;
206+
width: calc(100% - 24pt);
207+
}
208+
209+
.model-dataset-loading .controls {
210+
border: 1px solid var(--lit-neutral-300);
211+
display: flex;
212+
flex-grow: 1;
213+
flex-direction: row;
214+
padding: 16px;
215+
}
216+
217+
.model-dataset-loading .controls .actions {
218+
align-items: end;
219+
row-gap: 4px;
220+
}
221+
222+
.model-dataset-loading .controls .actions,
223+
.model-dataset-loading .controls .selector {
224+
display: flex;
225+
flex-direction: column;
226+
flex-shrink: 0;
227+
}
228+
229+
.model-dataset-loading .controls .options {
230+
display: flex;
231+
flex-direction: column;
232+
flex-grow: 1;
233+
height: 100%;
234+
max-height: 100%;
235+
overflow-y: scroll;
236+
padding: 0 12pt;
237+
row-gap: 8px;
238+
}
239+
240+
.model-dataset-loading .controls .options .option-entry {
241+
align-items: center;
242+
column-gap: 4px;
243+
display: flex;
244+
flex-direction: row;
245+
}
246+
247+
/* The first child of .options is always the new_name field, which is special */
248+
.model-dataset-loading .controls .options .option-entry:first-child {
249+
border-bottom: 1px solid var(--lit-neutral-300);
250+
padding-bottom: 8px;
251+
}
252+
253+
.model-dataset-loading .controls .options .option-entry label {
254+
flex: 0 0 200px;
255+
}
256+
257+
.model-dataset-loading .controls .options .option-entry lit-input-field {
258+
flex-grow: 1;
259+
}
260+
261+
.model-dataset-loading .header {
262+
color: var(--lit-neutral-700);
263+
font-size: 16px;
264+
margin-bottom: 12pt;
265+
}
266+
267+
#bottombar .status-area {
268+
color: var(--google-red-500);
269+
}
270+
203271
.expanded-info {
204272
display: flex;
205273
max-height: 0;

0 commit comments

Comments
 (0)