-
Notifications
You must be signed in to change notification settings - Fork 33
/
webservice.py
300 lines (246 loc) · 9.54 KB
/
webservice.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
"""Webservice for the haddock3 restraints module.
Exposes the haddock3-restraints CLI subcommands as a RESTful webservice.
Also includes endpoint for PDB preprocessing.
Run with:
```shell
uvicorn --port 5000 haddock.clis.restraints.webservice:app
```
The Swagger UI is running at http://127.0.0.1:5000/docs .
To pass a PDB file in the JSON body of a request,
it needs to be gzipped and then base64 encoded.
A base64 encoded gzipped PDB file can made with:
```shell
cat examples/data/2oob.pdb | gzip | base64 -w 0 > 2oob.pdb.gz.base64
```
Background for PDB file handling:
To store a multiline string, like a pdb file,
in JSON we need to encode it in base64.
Base64 encoding make things 1.33 times bigger.
A pdb is text which can be compressed a lot.
To transfer less data we can compress the pdb
with gzip before base64 encoding.
For example the 2oob.pdb 74.8Kb becomes 101Kb when base64 encoded
while first gzip and then base64 encode it is 25.4Kb.
"""
import gzip
import io
import tempfile
from base64 import b64decode
from contextlib import redirect_stdout
from typing import Annotated
from fastapi import FastAPI, HTTPException, status
from fastapi.middleware.gzip import GZipMiddleware
from pdbtools.pdb_chain import alter_chain
from pdbtools.pdb_fixinsert import fix_insertions
from pdbtools.pdb_selaltloc import select_by_occupancy
from pdbtools.pdb_selchain import select_chain
from pdbtools.pdb_tidy import tidy_pdbfile
from pdbtools.pdb_delhetatm import remove_hetatm
from pdbtools.pdb_keepcoord import keep_coordinates
from pydantic import BaseModel, Field
from starlette.responses import PlainTextResponse
from haddock.clis.restraints.calc_accessibility import (
apply_cutoff,
get_accessibility,
)
from haddock.clis.restraints.restrain_bodies import (
restrain_bodies as restrain_bodies_raw,
)
from haddock.libs.librestraints import (
active_passive_to_ambig,
check_parenthesis,
passive_from_active_raw,
validate_tbldata,
)
app = FastAPI()
app.add_middleware(GZipMiddleware, minimum_size=1000)
def unpacked_structure(
structure: str,
) -> bytes:
"""Gunzips a base64 encoded string."""
decoded = b64decode(structure)
return gzip.decompress(decoded)
def unpacked_tbl(tbl: str) -> str:
"""Gunzips a base64 encoded tbl file contents."""
return gzip.decompress(b64decode(tbl)).decode("utf-8")
Structure = Annotated[
str,
Field(
description="The structure file as a base64 encoded gzipped string.",
json_schema_extra=dict(
contentMediaType="text/plain",
contentEncoding="base64",
)
),
]
class PassiveFromActiveRequest(BaseModel):
structure: Structure
active: list[int] = Field(
description="List of active restraints.", examples=[[1, 2, 3]]
)
chain: str = Field(default="A", description="The chain identifier.")
surface: list[int] = Field(default=[], description="List of surface restraints.")
radius: float = Field(default=6.5, description="The radius from active.")
@app.post("/passive_from_active", tags=["restraints"])
def calculate_passive_from_active(
request: PassiveFromActiveRequest,
) -> list[int]:
"""Calculate active restraints to passive restraints."""
structure = unpacked_structure(request.structure)
with tempfile.NamedTemporaryFile() as structure_file:
structure_file.write(structure)
try:
passive = passive_from_active_raw(
structure=structure_file.name,
active=request.active,
chain_id=request.chain,
surface=request.surface,
radius=request.radius,
)
return passive
except Exception as e:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(e)
) from e
class ActPassToAmbigRequest(BaseModel):
active1: list[int] = Field(
description="List of active residues for the first model.", examples=[[1, 2, 3]]
)
active2: list[int] = Field(
description="List of active residues for the second model.",
examples=[[4, 5, 6]],
)
passive1: list[int] = Field(
description="List of passive residues for the first model.",
examples=[[7, 8, 9]],
)
passive2: list[int] = Field(
description="List of passive residues for the second model.",
examples=[[10, 11, 12]],
)
segid1: str = Field(default="A", description="Segid to use for the first model.")
segid2: str = Field(default="B", description="Segid to use for the second model.")
@app.post("/actpass_to_ambig", response_class=PlainTextResponse, tags=["restraints"])
def calculate_actpass_to_ambig(
request: ActPassToAmbigRequest,
) -> str:
"""Get the passive residues."""
output = io.StringIO()
with redirect_stdout(output):
active_passive_to_ambig(
active1=request.active1,
passive1=request.passive1,
active2=request.active2,
passive2=request.passive2,
segid1=request.segid1,
segid2=request.segid2,
)
ambig = output.getvalue()
return ambig.strip()
class RestrainBodiesRequest(BaseModel):
structure: Structure
exclude: list[str] = Field(
default=[],
description="Chains to exclude from the calculation.",
examples=[["B"]],
)
@app.post("/restrain_bodies", response_class=PlainTextResponse, tags=["restraints"])
def restrain_bodies(request: RestrainBodiesRequest) -> str:
"""Create distance restraints to lock several chains together."""
structure = unpacked_structure(request.structure)
with tempfile.NamedTemporaryFile() as structure_file:
structure_file.write(structure)
output = io.StringIO()
with redirect_stdout(output):
restrain_bodies_raw(
structure=structure_file.name,
exclude=request.exclude,
)
tbl = output.getvalue()
return tbl.strip()
class CalcAccessibilityRequest(BaseModel):
structure: Structure
cutoff: float = Field(
description="Relative cutoff for sidechain accessibility.", examples=[0.4]
)
@app.post("/calc_accessibility", tags=["restraints"])
def calculate_accessibility(
request: CalcAccessibilityRequest,
) -> dict[str, list[int]]:
"""Calculate the accessibility of the side chains and apply a cutoff."""
structure = unpacked_structure(request.structure)
with tempfile.NamedTemporaryFile() as structure_file:
structure_file.write(structure)
try:
access_dic = get_accessibility(structure_file.name)
# Filter residues based on accessibility cutoff
result_dict = apply_cutoff(access_dic, request.cutoff)
return result_dict
except AssertionError as e:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(e)
) from e
class ValidateTblRequest(BaseModel):
tbl: Annotated[
str,
Field(
description="The TBL file as base64 encoded gzipped string.",
json_schema_extra=dict(
contentMediaType="text/plain",
contentEncoding="base64",
)
),
]
pcs: bool = Field(
default=False,
description="Flag to indicate if the TBL file is in PCS mode.",
)
quick: bool = Field(
default=False,
description="Check global formatting before going line by line (opening/closing parenthesis and quotation marks.",
)
@app.post("/validate_tbl", response_class=PlainTextResponse, tags=["restraints"])
def validate_tbl(
request: ValidateTblRequest,
) -> str:
tbl = unpacked_tbl(request.tbl)
try:
if request.quick:
check_parenthesis(tbl)
return validate_tbldata(tbl, request.pcs)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(e)
) from e
class PDBPreprocessRequest(BaseModel):
structure: Structure
from_chain: str = Field(description="Chains to keep", examples=["A"])
to_chain: str = Field(description="New chain identifier", examples=["A"])
delhetatm: bool = Field(description="Delete HETATM records", examples=[True], default=False)
keepcoord: bool = Field(description="Remove all non-coordinate records", examples=[True], default=False)
@app.post("/preprocess_pdb", response_class=PlainTextResponse, tags=["pdb"])
def preprocess_pdb(request: PDBPreprocessRequest) -> str:
"""Preprocess a PDB file.
Runs the following [pdbtools](http://www.bonvinlab.org/pdb-tools/) pipeline:
```shell
cat pdb | pdb_tidy -strict | pdb_selchain -<from_chain> | pdb_chain -<to_chain> | pdb_fixinsert | pdb_selaltloc | pdb_tidy -strict
```
or with `delhetatm` and `keepcoord` set to true:
```shell
cat pdb | pdb_tidy -strict | pdb_selchain -<from_chain> | pdb_chain -<to_chain> | pdb_delhetatm | \
pdb_fixinsert | pdb_keepcoord | pdb_selaltloc | pdb_tidy -strict
```
"""
structure = unpacked_structure(request.structure).decode("latin_1")
lines = structure.splitlines()
lines = list(tidy_pdbfile(lines, strict=True))
lines = list(select_chain(lines, request.from_chain))
lines = list(alter_chain(lines, request.to_chain))
if request.delhetatm:
lines = list(remove_hetatm(lines))
lines = list(fix_insertions(lines, []))
if request.keepcoord:
lines = list(keep_coordinates(lines))
lines = list(select_by_occupancy(lines))
lines = list(tidy_pdbfile(lines, strict=True))
return "".join(lines)