Skip to content

Reference

login(username=None, password=None)

Login the GISAID Flu database, parse elements ids and store them in a credentials object.

Parameters:

Name Type Description Default
username str

The username to log in with. If not provided, it will be fetched from the environment variable "GISAID_USERNAME".

None
password str

The password to log in with. If not provided, it will be fetched from the environment variable "GISAID_PASSWORD".

None
Return

credentials

Example
# Log in with provided username and password
gisflu.login("myusername", "mypassword")

# Log in using environment variables
gisflu.login()
Source code in src/gisflu/login.py
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
def login(username: str | None = None, password: str | None = None) -> credentials:
    """
    Login the GISAID Flu database, parse elements ids and store them in a credentials object.

    Args:
        username (str, optional): The username to log in with. If not provided, it will be fetched from the environment variable "GISAID_USERNAME".
        password (str, optional): The password to log in with. If not provided, it will be fetched from the environment variable "GISAID_PASSWORD".

    Return:
        credentials

    Example:
        ```
        # Log in with provided username and password
        gisflu.login("myusername", "mypassword")

        # Log in using environment variables
        gisflu.login()
        ```
    """

    cred = credentials()

    # get username and password
    if username is None or password is None:
        logger.debug(
            "Username and password not provided, fetching from environment variables"
        )

        username = os.getenv("GISAID_USERNAME")
        password = os.getenv("GISAID_PASSWORD")

        assert (
            username is not None
        ), 'Please set the environment variable "GISAID_USERNAME"'
        assert (
            password is not None
        ), 'Please set the environment variable "GISAID_PASSWORD"'

    password_md5 = hashlib.md5(password.encode()).hexdigest()

    # fetch sessionId first
    res = httpGet(cred.url, headers=cred.headers)
    cred.sessionId = re.search(r'name="sid" value=\'(.+?)\'', res.text).group(1)
    logger.debug(f"Get sessionId: {cred.sessionId}")

    # then get login page, to get more ids
    res = httpGet(f"{cred.url}?sid={cred.sessionId}", headers=cred.headers)
    loginPageText = res.text
    cred.windowId = re.search(r'sys\["WID"\] = "(.+?)";', loginPageText).group(1)
    cred.loginPage["pid"] = re.search(r'sys\["PID"\] = "(.+?)";', loginPageText).group(
        1
    )
    cred.loginPage["loginCompId"] = re.search(
        r"sys.getC\(\'(.+?)\'\).call\(\'doLogin\'", loginPageText
    ).group(1)

    # login by command pipeline
    cmdPipe = [
        buildCommand(
            CompId=cred.loginPage["loginCompId"],
            cmd="doLogin",
            params={"login": username, "hash": password_md5},
        )
    ]

    body = buildRequestBody(
        cred.sessionId, cred.windowId, cred.loginPage["pid"], cmdPipe, mode="ajax"
    )

    res = httpPost(cred.url, data=body, headers=cred.headers)
    assert re.search("cms_page", res.text), "Username or password wrong!"
    logger.debug("username and password validated!")

    # first page after login
    logger.debug("Go to first page...")
    res = httpGet(f"{cred.url}?sid={cred.sessionId}", headers=cred.headers)
    firstPageText = res.text
    cred.firstPage["pid"] = re.search(r'sys\["PID"\] = "(.+?)";', firstPageText).group(
        1
    )
    cred.firstPage["dbSwitchCompId"] = re.search(
        r"sys.call\(\'(.+?)\',\'Go\'", firstPageText
    ).group(1)

    # fetch flu home page id by command pipeline
    logger.debug("Go to flu homepage...")
    cmdPipe = [
        buildCommand(
            CompId=cred.firstPage["dbSwitchCompId"], cmd="Go", params={"page": "epi3"}
        )
    ]

    body = buildRequestBody(
        cred.sessionId, cred.windowId, cred.firstPage["pid"], cmdPipe
    )

    res = httpPost(cred.url, data=body, headers=cred.headers)
    homePagePid = re.search(r"sys.goPage\(\'(.+?)\'\)", res.text).group(1)
    cred.homePage["pid"] = homePagePid

    # go to flu home page
    res = httpGet(
        f"{cred.url}?sid={cred.sessionId}&pid={homePagePid}", headers=cred.headers
    )
    homePageText = res.text

    ################## browse page ####################
    logger.debug("Parse browse page...")

    # fetch browse(search) page id
    cred.homePage["browseCompId"] = re.search(
        r"class=\"sys-actionbar-action-ni\" onclick=\"sys.getC\(\'(.+?)\'\)",
        homePageText,
    ).group(1)

    cmdPipe = [buildCommand(CompId=cred.homePage["browseCompId"], cmd="Browse")]

    body = buildRequestBody(
        cred.sessionId, cred.windowId, cred.homePage["pid"], cmdPipe
    )

    res = httpPost(cred.url, data=body, headers=cred.headers)

    browsePagePid = re.search(r"sys.goPage\(\'(.+?)\'\)", res.text).group(1)
    cred.browsePage["pid"] = browsePagePid

    # go to browse page
    res = httpGet(
        f"{cred.url}?sid={cred.sessionId}&pid={browsePagePid}", headers=cred.headers
    )
    browsePageText = res.text

    cred.browsePage["browseFormCompId"] = re.search(
        r"sys\.createComponent\(\'(c_\w+?)\',\'IsolateBrowseFormComponent\'",
        browsePageText,
    ).group(1)

    cred.browsePage["searchButtonCompId"] = re.search(
        r"sys\.createComponent\(\'(c_\w+?)\',\'IsolateSearchButtonsComponent\'",
        browsePageText,
    ).group(1)

    # fetch browse component event id
    browseItemText = re.findall(r"createFI\(.+?function", browsePageText)

    browseItemDict = {}
    for s in browseItemText:
        ident = re.search(r"Widget\',\'(.+?)\',function", s).group(1)
        ceid = re.search(r"createFI\(\'(.+?)\',", s).group(1)
        browseItemDict[ident] = ceid

    cred.browseParamsCeid["type"] = browseItemDict["isl_type"]
    cred.browseParamsCeid["HA"] = browseItemDict["isl_subtype_h"]
    cred.browseParamsCeid["NA"] = browseItemDict["isl_subtype_n"]
    cred.browseParamsCeid["lineage"] = browseItemDict["isl_lineage"]
    cred.browseParamsCeid["host"] = browseItemDict["isl_host"]
    cred.browseParamsCeid["location"] = browseItemDict["isl_location"]
    cred.browseParamsCeid["collectDateFrom"] = browseItemDict["isl_collect_date_from"]
    cred.browseParamsCeid["collectDateTo"] = browseItemDict["isl_collect_date_to"]

    ################## result page ####################
    logger.debug("Parse result page...")

    # fetch result page id
    cmdPipe = [buildCommand(CompId=cred.browsePage["searchButtonCompId"], cmd="search")]
    body = buildRequestBody(
        cred.sessionId, cred.windowId, cred.browsePage["pid"], cmdPipe
    )
    res = httpPost(cred.url, data=body, headers=cred.headers)
    resultPagePid = re.search(r"sys.goPage\(\'(.+?)\'\)", res.text).group(1)
    cred.resultPage["pid"] = resultPagePid

    # go to result page
    res = httpGet(
        f"{cred.url}?sid={cred.sessionId}&pid={resultPagePid}", headers=cred.headers
    )
    resultPageText = res.text
    cred.resultPage["resultCompId"] = re.search(
        r"sys\.createComponent\(\'(c_\w+?)\',\'IsolateResultListComponent\'",
        resultPageText,
    ).group(1)
    cred.resultPage["downloadCompId"] = re.search(
        r"sys\.createComponent\(\'(c_\w+?)\',\'IsolateDownloadButtonComponent\'",
        resultPageText,
    ).group(1)

    # parse result table header
    tableHeaderText = re.findall(r"new Object\(\{\'label.+?cid", resultPageText)

    for s in tableHeaderText:
        label = re.search(r"label\':\'([\w ]+?)\'", s).group(1)
        key = re.search(r"key\':\'(\w+?)\'", s).group(1)
        cred.resultHeaderDict[key] = label

    ################## download page ####################
    logger.debug("Parse download page...")

    # get a temp record
    cmdPipe = [
        buildCommand(
            CompId=cred.resultPage["resultCompId"],
            cmd="SetPaginating",
            params={"start_index": 0, "rows_per_page": 27},
        ),
        buildCommand(CompId=cred.resultPage["resultCompId"], cmd="GetData"),
    ]

    body = buildRequestBody(
        cred.sessionId, cred.windowId, cred.resultPage["pid"], cmdPipe
    )
    res = httpPost(cred.url, data=body, headers=cred.headers)

    tempRecordId = res.json()["records"][0]["b"]

    # select this temp record
    cmdPipe = [
        buildCommand(
            CompId=cred.resultPage["resultCompId"],
            cmd="ChangeValue",
            params={"row_id": tempRecordId, "col_name": "c", "value": True},
        ),
        buildCommand(CompId=cred.resultPage["downloadCompId"], cmd="Download"),
    ]

    body = buildRequestBody(
        cred.sessionId, cred.windowId, cred.resultPage["pid"], cmdPipe
    )

    res = httpPost(cred.url, data=body, headers=cred.headers)

    cred.downloadWindowId, cred.downloadPage["pid"] = re.search(
        r"sys.openOverlay\(\'(\w+?)\',\'(\w+?)\'", res.text
    ).group(1, 2)

    # go to download overlay page
    res = httpGet(
        f'{cred.url}?sid={cred.sessionId}&pid={cred.downloadPage["pid"]}',
        headers=cred.headers,
    )
    downloadPageText = res.text

    cred.downloadPage["resultDownloadCompId"] = re.search(
        r"sys\.createComponent\(\'(c_\w+?)\',\'IsolateResultDownloadComponent\'",
        downloadPageText,
    ).group(1)

    # fetch download item ceid
    downloadItemText = re.findall(r"createFI\(.+?function", downloadPageText)

    downloadItemDict = {}
    for s in downloadItemText:
        ident = re.search(r"Widget\',\'(.+?)\',function", s).group(1)
        ceid = re.search(r"createFI\(\'(.+?)\',", s).group(1)
        downloadItemDict[ident] = ceid

    cred.downloadParamsCeid["downloadFormat"] = downloadItemDict["format"]
    cred.downloadParamsCeid["downloadConfirm"] = downloadItemDict["download"]

    # fetch protein segment ceid

    cmdPipe = [
        buildCommand(
            CompId=cred.downloadPage["resultDownloadCompId"],
            cmd="setTarget",
            params={
                "cvalue": "proteins",
                "ceid": cred.downloadParamsCeid["downloadFormat"],
            },
            equiv=f'ST{cred.downloadParamsCeid["downloadFormat"]}',
        ),
        buildCommand(
            CompId=cred.downloadPage["resultDownloadCompId"],
            cmd="ChangeValue",
            params={
                "cvalue": "proteins",
                "ceid": cred.downloadParamsCeid["downloadFormat"],
            },
            equiv=f'CV{cred.downloadParamsCeid["downloadFormat"]}',
        ),
        buildCommand(
            CompId=cred.downloadPage["resultDownloadCompId"],
            cmd="ShowProteins",
            params={"ceid": cred.downloadParamsCeid["downloadFormat"]},
        ),
    ]

    body = buildRequestBody(
        cred.sessionId, cred.downloadWindowId, cred.downloadPage["pid"], cmdPipe
    )

    res = httpPost(cred.url, data=body, headers=cred.headers)

    downloadProteinText = res.text

    cred.downloadParamsCeid["proteinSegment"] = re.search(
        r"createFI\(\'(\w+?)\',\'CheckboxWidget\',\'proteins\'", downloadProteinText
    ).group(1)

    # fetch dna segment ceid
    cmdPipe = [
        buildCommand(
            CompId=cred.downloadPage["resultDownloadCompId"],
            cmd="setTarget",
            params={
                "cvalue": "dna",
                "ceid": cred.downloadParamsCeid["downloadFormat"],
            },
            equiv=f'ST{cred.downloadParamsCeid["downloadFormat"]}',
        ),
        buildCommand(
            CompId=cred.downloadPage["resultDownloadCompId"],
            cmd="ChangeValue",
            params={
                "cvalue": "dna",
                "ceid": cred.downloadParamsCeid["downloadFormat"],
            },
            equiv=f'CV{cred.downloadParamsCeid["downloadFormat"]}',
        ),
        buildCommand(
            CompId=cred.downloadPage["resultDownloadCompId"],
            cmd="ShowProteins",
            params={"ceid": cred.downloadParamsCeid["downloadFormat"]},
        ),
    ]

    body = buildRequestBody(
        cred.sessionId, cred.downloadWindowId, cred.downloadPage["pid"], cmdPipe
    )

    res = httpPost(cred.url, data=body, headers=cred.headers)

    downloadDNAText = res.text

    cred.downloadParamsCeid["dnaSegment"] = re.search(
        r"createFI\(\'(\w+?)\',\'CheckboxWidget\',\'dna\'", downloadDNAText
    ).group(1)

    cred.downloadParamsCeid["fastaHeader"] = re.search(
        r"createFI\(\'(\w+?)\',\'EntryWidget\',\'header\'", downloadDNAText
    ).group(1)

    ################## return browse page ####################
    downloadToResultPage(cred)
    resultToBrowsePage(cred)
    logger.debug(f"{username} logged!")

    return cred

search(cred, type=None, HA=None, NA=None, host=None, collectDateFrom=None, collectDateTo=None, recordLimit=50)

Search for records in the GISAID Flu database based on specified criteria.

Parameters:

Name Type Description Default
cred credentials

The credentials object containing session information.

required
type List[str]

A list of virus types to filter the search results. Defaults to None.

None
HA List[str]

A list of hemagglutinin (HA) subtypes to filter the search results. Defaults to None.

None
NA List[str]

A list of neuraminidase (NA) subtypes to filter the search results. Defaults to None.

None
host List[str]

A list of host species to filter the search results. Defaults to None.

None
collectDateFrom str

The starting date for the collection date filter. Defaults to None.

None
collectDateTo str

The ending date for the collection date filter. Defaults to None.

None
recordLimit int

The maximum number of records to return. Defaults to 50.

50
Return

pd.DataFrame: A DataFrame containing the search results.

Example
cred = gisflu.login()
gisflu.search(cred, type=["A"], HA=["3"], NA=["2"],
    collectDateFrom="2020-01-01", recordLimit=10)
Source code in src/gisflu/browse.py
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
def search(
    cred: credentials,
    type: List[str] | None = None,
    HA: List[str] | None = None,
    NA: List[str] | None = None,
    host: List[str] | None = None,
    collectDateFrom: str | None = None,
    collectDateTo: str | None = None,
    recordLimit: int = 50,
) -> pd.DataFrame:
    """
    Search for records in the GISAID Flu database based on specified criteria.

    Args:
        cred (credentials): The credentials object containing session information.
        type (List[str], optional): A list of virus types to filter the search results. Defaults to None.
        HA (List[str], optional): A list of hemagglutinin (HA) subtypes to filter the search results. Defaults to None.
        NA (List[str], optional): A list of neuraminidase (NA) subtypes to filter the search results. Defaults to None.
        host (List[str], optional): A list of host species to filter the search results. Defaults to None.
        collectDateFrom (str, optional): The starting date for the collection date filter. Defaults to None.
        collectDateTo (str, optional): The ending date for the collection date filter. Defaults to None.
        recordLimit (int, optional): The maximum number of records to return. Defaults to 50.

    Return:
        pd.DataFrame: A DataFrame containing the search results.

    Example:
        ```
        cred = gisflu.login()
        gisflu.search(cred, type=["A"], HA=["3"], NA=["2"],
            collectDateFrom="2020-01-01", recordLimit=10)
        ```
    """

    # search by command pipeline
    cmdPipe = []
    if type:
        cmdPipe += buildBrowseCommand(cred, "type", type)
    if HA:
        cmdPipe += buildBrowseCommand(cred, "HA", HA)
    if NA:
        cmdPipe += buildBrowseCommand(cred, "NA", NA)
    if host:
        cmdPipe += buildBrowseCommand(cred, "host", host)
    if collectDateFrom:
        cmdPipe += buildBrowseCommand(cred, "collectDateFrom", collectDateFrom)
    if collectDateTo:
        cmdPipe += buildBrowseCommand(cred, "collectDateTo", collectDateTo)

    body = buildRequestBody(
        cred.sessionId, cred.windowId, cred.browsePage["pid"], cmdPipe
    )

    res = httpPost(cred.url, data=body, headers=cred.headers)

    # records count in the browse page
    preResultText = res.text

    recordCount, recordSeqCount = [
        int(i.replace(",", ""))
        for i in re.search(
            r"Total: ([\d,]+) viruses \(([\d,]+) sequences\)", preResultText
        ).group(1, 2)
    ]

    logger.info(f"{recordCount} records, {recordSeqCount} seqs found")

    # refresh result page id
    cmdPipe = [buildCommand(CompId=cred.browsePage["searchButtonCompId"], cmd="search")]
    body = buildRequestBody(
        cred.sessionId, cred.windowId, cred.browsePage["pid"], cmdPipe
    )
    res = httpPost(cred.url, data=body, headers=cred.headers)
    resultPagePid = re.search(r"sys.goPage\(\'(.+?)\'\)", res.text).group(1)
    cred.resultPage["pid"] = resultPagePid

    logger.debug("Parse result page...")
    # go to result page
    res = httpGet(
        f"{cred.url}?sid={cred.sessionId}&pid={resultPagePid}", headers=cred.headers
    )
    resultPageText = res.text
    cred.resultPage["resultCompId"] = re.search(
        r"sys\.createComponent\(\'(c_\w+?)\',\'IsolateResultListComponent\'",
        resultPageText,
    ).group(1)

    logger.debug("Fetch result records...")
    # fetch records
    if recordCount > 0:
        resultJson = []

        batches = buildBatch(0, min(recordCount, recordLimit) - 1, batchSize=27)
        for batch in tqdm(batches):
            cmdPipe = [
                buildCommand(
                    CompId=cred.resultPage["resultCompId"],
                    cmd="SetPaginating",
                    params={
                        "start_index": batch["start"],
                        "rows_per_page": batch["count"],
                    },
                ),
                buildCommand(CompId=cred.resultPage["resultCompId"], cmd="GetData"),
            ]

            body = buildRequestBody(
                cred.sessionId, cred.windowId, cred.resultPage["pid"], cmdPipe
            )
            res = httpPost(cred.url, data=body, headers=cred.headers)

            resultJson += res.json()["records"]

        # records dataframe
        reslutDF = pd.DataFrame(resultJson)

        reslutDF = reslutDF.drop(
            [s for s in reslutDF.columns if s not in cred.resultHeaderDict.keys()],
            axis=1,
        )

        reslutDF = reslutDF.rename(columns=cred.resultHeaderDict)

        reslutDF = reslutDF.drop(["__toggle__", "edit", "HE", "P3"], axis=1)

        for col in ["Name", "PB2", "PB1", "PA", "HA", "NP", "NA", "MP", "NS"]:
            reslutDF[col] = reslutDF[col].str.replace(
                r"^.+?>(.+?)</.+$", r"\1", regex=True
            )
    else:
        reslutDF = pd.DataFrame()

    resultToBrowsePage(cred)

    nrow = reslutDF.shape[0]
    logger.debug(f"Search completed: return {nrow} rows")

    return reslutDF

download(cred, isolateIds, downloadType='protein', segments=['HA', 'NA'], filename=None)

Downloads records for the given isolate IDs.

Parameters:

Name Type Description Default
cred object

The credentials object.

required
isolateIds list

List of isolate IDs to download data for.

required
downloadType str

The type of data to download. Defaults to "protein".

'protein'
segments list

List of segments to download. Defaults to ["HA", "NA"].

['HA', 'NA']
filename str

The name of the file to save the downloaded data. If not provided, a default filename will be generated.

None
Return

None

Example
cred = gisflu.login()
isolateIds = ["EPI_ISL_19185107", "EPI_ISL_19151100"]
gisflu.download(cred, isolateIds, downloadType="protein", segments=["HA", "NA"],
    filename="records.fasta")
Source code in src/gisflu/download.py
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
def download(
    cred: credentials,
    isolateIds: List[str],
    downloadType: str = "protein",
    segments: List[str] = ["HA", "NA"],
    filename: str | None = None,
) -> None:
    """
    Downloads records for the given isolate IDs.

    Args:
        cred (object): The credentials object.
        isolateIds (list): List of isolate IDs to download data for.
        downloadType (str, optional): The type of data to download. Defaults to "protein".
        segments (list, optional): List of segments to download. Defaults to ["HA", "NA"].
        filename (str, optional): The name of the file to save the downloaded data. If not provided, a default filename will be generated.

    Return:
        None

    Example:
        ```
        cred = gisflu.login()
        isolateIds = ["EPI_ISL_19185107", "EPI_ISL_19151100"]
        gisflu.download(cred, isolateIds, downloadType="protein", segments=["HA", "NA"],
            filename="records.fasta")
        ```
    """

    assert all(
        id.startswith("EPI_ISL_") for id in isolateIds
    ), 'isolateId must start with "EPI_ISL_"'

    assert downloadType in [
        "metadata",
        "protein",
        "dna",
    ], "downloadType must be metadata|protein|dna"

    segmentCheck = [
        "NP",
        "P3",
        "HA",
        "M1",
        "M2",
        "BM2",
        "CM2",
        "M",
        "NA",
        "NB",
        "NS1",
        "NEP",
        "NS2",
        "PA",
        "PA-X",
        "PB1-F2",
        "PB1",
        "HE",
        "PB2",
    ]

    unknownSegments = [segment for segment in segments if segment not in segmentCheck]
    unknownSegmentStr = ", ".join(unknownSegments)
    assert len(unknownSegments) == 0, f"Unknown segment(s): {unknownSegmentStr}"

    logger.debug("Go to result page...")
    # fetch result page id
    cmdPipe = [buildCommand(CompId=cred.browsePage["searchButtonCompId"], cmd="search")]
    body = buildRequestBody(
        cred.sessionId, cred.windowId, cred.browsePage["pid"], cmdPipe
    )
    res = httpPost(cred.url, data=body, headers=cred.headers)
    resultPagePid = re.search(r"sys.goPage\(\'(.+?)\'\)", res.text).group(1)
    cred.resultPage["pid"] = resultPagePid

    # go to result page
    res = httpGet(
        f"{cred.url}?sid={cred.sessionId}&pid={resultPagePid}", headers=cred.headers
    )

    # select records, get download page id
    cmdPipe = [
        buildCommand(
            CompId=cred.resultPage["resultCompId"],
            cmd="ChangeValue",
            params={
                "row_id": acc.replace("EPI_ISL_", ""),
                "col_name": "c",
                "value": True,
            },
        )
        for acc in isolateIds
    ]
    cmdPipe += [buildCommand(CompId=cred.resultPage["downloadCompId"], cmd="Download")]

    body = buildRequestBody(
        cred.sessionId, cred.windowId, cred.resultPage["pid"], cmdPipe
    )

    res = httpPost(cred.url, data=body, headers=cred.headers)

    cred.downloadWindowId, cred.downloadPage["pid"] = re.search(
        r"sys.openOverlay\(\'(\w+?)\',\'(\w+?)\'", res.text
    ).group(1, 2)

    logger.debug("Go to download page...")
    # go to download overlay page
    res = httpGet(
        f'{cred.url}?sid={cred.sessionId}&pid={cred.downloadPage["pid"]}',
        headers=cred.headers,
    )
    resultDownloadCompId = cred.downloadPage["resultDownloadCompId"]

    logger.debug("Set download params...")
    if downloadType == "metadata":
        cmdPipe = [
            buildCommand(CompId=resultDownloadCompId, cmd="download"),
        ]

        body = buildRequestBody(
            cred.sessionId, cred.downloadWindowId, cred.downloadPage["pid"], cmdPipe
        )

        res = httpPost(cred.url, data=body, headers=cred.headers)

        api = re.search(r"sys\.downloadFile\(\\\"(.+?)\\\"", res.text).group(1)
    elif downloadType in ["protein", "dna"]:
        if downloadType == "protein":
            typeCvalue = "proteins"
            downloadSegmentCeid = cred.downloadParamsCeid["proteinSegment"]
            faHeader = "Protein Accession no.|Gene name|Isolate name|Isolate ID|Type@Collection date"
        else:
            typeCvalue = "dna"
            downloadSegmentCeid = cred.downloadParamsCeid["dnaSegment"]
            faHeader = (
                "DNA Accession no.|Segment|Isolate name|Isolate ID|Type@Collection date"
            )

        resultDownloadCompId = cred.downloadPage["resultDownloadCompId"]
        downloadFormatCeid = cred.downloadParamsCeid["downloadFormat"]
        fastaHeaderCeid = cred.downloadParamsCeid["fastaHeader"]

        cmdPipe = [
            # select protein|dna
            buildCommand(
                CompId=resultDownloadCompId,
                cmd="setTarget",
                params={
                    "cvalue": typeCvalue,
                    "ceid": downloadFormatCeid,
                },
                equiv=f"ST{downloadFormatCeid}",
            ),
            buildCommand(
                CompId=resultDownloadCompId,
                cmd="ChangeValue",
                params={
                    "cvalue": typeCvalue,
                    "ceid": downloadFormatCeid,
                },
                equiv=f"CV{downloadFormatCeid}",
            ),
            buildCommand(
                CompId=resultDownloadCompId,
                cmd="ShowProteins",
                params={"ceid": downloadFormatCeid},
            ),
            # check segment
            buildCommand(
                CompId=resultDownloadCompId,
                cmd="setTarget",
                params={"cvalue": segments, "ceid": downloadSegmentCeid},
                equiv=f"ST{downloadSegmentCeid}",
            ),
            buildCommand(
                CompId=resultDownloadCompId,
                cmd="ChangeValue",
                params={"cvalue": segments, "ceid": downloadSegmentCeid},
                equiv=f"CV{downloadSegmentCeid}",
            ),
            buildCommand(
                CompId=resultDownloadCompId,
                cmd="SelChange",
                params={"ceid": downloadSegmentCeid},
            ),
            # set fasta header
            buildCommand(
                CompId=resultDownloadCompId,
                cmd="setTarget",
                params={"cvalue": faHeader, "ceid": fastaHeaderCeid},
                equiv=f"ST{fastaHeaderCeid}",
            ),
            buildCommand(
                CompId=resultDownloadCompId,
                cmd="ChangeValue",
                params={"cvalue": faHeader, "ceid": fastaHeaderCeid},
                equiv=f"CV{fastaHeaderCeid}",
            ),
            buildCommand(
                CompId=resultDownloadCompId,
                cmd="fillExampleCopied",
                params={"ceid": fastaHeaderCeid},
            ),
            # download
            buildCommand(CompId=resultDownloadCompId, cmd="download"),
        ]

        body = buildRequestBody(
            cred.sessionId, cred.downloadWindowId, cred.downloadPage["pid"], cmdPipe
        )

        res = httpPost(cred.url, data=body, headers=cred.headers)

        api = re.search(r"sys\.downloadFile\(\\\"(.+?)\\\"", res.text).group(1)

    # download
    logger.debug("Downloading...")
    now = datetime.now().strftime("%Y%m%d-%H%M%S")
    count = len(isolateIds)
    if filename is None:
        if downloadType == "metadata":
            extension = "xls"
        elif downloadType in ["protein", "dna"]:
            extension = "fasta"
        filename = f"gisflu-{downloadType}-{count}records-{now}.{extension}"

    downloadLink = "https://" + urllib.parse.urlparse(cred.url).hostname + api
    res = httpGet(downloadLink, headers=cred.headers)

    with open(filename, "wb") as f:
        f.write(res.content)

    downloadToResultPage(cred)
    resultToBrowsePage(cred)

    return None