2006-10-03 20:59:21 +00:00
|
|
|
<?xml version="1.0"?>
|
|
|
|
<!DOCTYPE sect1 PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
|
|
|
|
"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd">
|
|
|
|
<sect1 id="condor">
|
|
|
|
<title>CONDOR</title>
|
2006-10-04 13:49:16 +00:00
|
|
|
|
|
|
|
<sect2 id="condor_app">
|
|
|
|
<title>Application in Condor environment</title>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
Master-worker application developed using DC-API can be run in a
|
|
|
|
Condor environment. The master program must be started by hand and
|
|
|
|
it submits workunits to a Condor execution pool.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
All files that generated by the application including the master and
|
2006-10-19 10:32:49 +00:00
|
|
|
the worker programs and the DC-API library itself are placed under a
|
2006-10-04 13:49:16 +00:00
|
|
|
directory called <emphasis>working directory</emphasis>.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
</sect2>
|
|
|
|
|
|
|
|
<sect2 id="condor_environment">
|
|
|
|
<title>Condor environment</title>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
To execute a DC-API application using Condor version of the DC-API
|
|
|
|
library you have to set up a Condor environment and have access to
|
|
|
|
it.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
Master program of the application must be started on a Condor submit
|
|
|
|
host so it will be able to submit workunits as Condos jobs.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
Working directory of the application must be accessible by the
|
|
|
|
master and the worker processes too so it should be placed on a
|
|
|
|
shared filesystem (e.g. NFS) which is available for the submit and
|
|
|
|
the execution hosts in the Condor pool.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
</sect2>
|
|
|
|
|
|
|
|
<sect2 id="condor_required">
|
|
|
|
<title>Required tools</title>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
To compile the application using Condor version of the DC-API
|
|
|
|
library you need an additional library <filename
|
2006-11-23 15:45:08 +00:00
|
|
|
class="libraryfile">libcondorapi.a</filename> which is included in the
|
2006-10-04 13:49:16 +00:00
|
|
|
Condor installation. This library must be linked to the application
|
|
|
|
besides the DC-API library.
|
2006-11-15 11:21:51 +00:00
|
|
|
|
|
|
|
<caution><title>Do not use Condor's lib directory</title>
|
|
|
|
<para>
|
|
|
|
Do not specify Condor's lib directory for the linker when
|
|
|
|
compiling the application. For example do not use the option:
|
|
|
|
|
|
|
|
<example><title>Linker option</title>
|
|
|
|
<programlisting>
|
|
|
|
... -L$CONDOR_HOME/lib ...
|
|
|
|
</programlisting>
|
|
|
|
</example>
|
|
|
|
|
|
|
|
Instead, copy out the <filename
|
2006-11-23 15:45:08 +00:00
|
|
|
class="libraryfile">libcondorapi.a</filename> file to somewhere else
|
2006-11-15 11:21:51 +00:00
|
|
|
and use that directory after the linker's -L option.
|
|
|
|
</para>
|
|
|
|
</caution>
|
|
|
|
|
2006-10-04 13:49:16 +00:00
|
|
|
</para>
|
|
|
|
|
|
|
|
</sect2>
|
|
|
|
|
|
|
|
<sect2 id="condor_configuration">
|
2006-10-03 20:59:21 +00:00
|
|
|
<title>Configuration options</title>
|
|
|
|
|
2006-10-04 13:49:16 +00:00
|
|
|
<variablelist>
|
|
|
|
|
|
|
|
<varlistentry>
|
2006-10-19 10:32:49 +00:00
|
|
|
<term>InstanceUUID</term>
|
2006-10-04 13:49:16 +00:00
|
|
|
<listitem>
|
|
|
|
<para>
|
2006-10-19 10:32:49 +00:00
|
|
|
|
|
|
|
REQUIRED. Identification of running instance of the
|
|
|
|
application. For CONDOR backend it can be any string not
|
|
|
|
just an UUID.
|
|
|
|
|
2006-10-04 13:49:16 +00:00
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
2006-10-19 10:32:49 +00:00
|
|
|
<varlistentry>
|
|
|
|
<term>WorkingDirectory</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
|
|
|
|
REQUIRED. Name of working directory of the
|
|
|
|
application. All files that are generated by the
|
|
|
|
application or the DC-API library are placed under this
|
|
|
|
directory. Different applications can use the same working
|
|
|
|
directory because every instance has its own subdirectory
|
|
|
|
there.
|
|
|
|
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term>ClientMessageBox</term>
|
2006-10-04 13:49:16 +00:00
|
|
|
<listitem>
|
|
|
|
<para>
|
2006-10-19 10:32:49 +00:00
|
|
|
|
|
|
|
Name of the directory in workunit's working directory
|
|
|
|
where messages are placed which are sent by the client to
|
|
|
|
the master by <function><link
|
2006-10-19 11:16:27 +00:00
|
|
|
linkend="DC-sendMessage">DC_sendMessage()</link></function>. Default
|
2006-10-19 10:32:49 +00:00
|
|
|
value is <filename>_dcapi_client_messages</filename>.
|
|
|
|
|
2006-10-04 13:49:16 +00:00
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
2006-10-19 10:32:49 +00:00
|
|
|
<term>MasterMessageBox</term>
|
2006-10-04 13:49:16 +00:00
|
|
|
<listitem>
|
|
|
|
<para>
|
2006-10-19 10:32:49 +00:00
|
|
|
|
|
|
|
Name of the directory in workunit's working directory
|
|
|
|
where <function><link
|
2006-10-19 11:16:27 +00:00
|
|
|
linkend="DC-sendWUMessage">DC_sendWUMessage()</link></function>
|
2007-03-12 21:00:09 +00:00
|
|
|
places messages sent by the master to the client. Default
|
2006-10-19 10:32:49 +00:00
|
|
|
value is <filename>_dcapi_master_messages</filename>.
|
|
|
|
|
2006-10-04 13:49:16 +00:00
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
2006-10-19 10:32:49 +00:00
|
|
|
<term>SubresultBox</term>
|
2006-10-04 13:49:16 +00:00
|
|
|
<listitem>
|
|
|
|
<para>
|
2006-10-19 10:32:49 +00:00
|
|
|
|
|
|
|
Name of the directory in workunit's working directory
|
|
|
|
where <function><link
|
2006-10-19 11:16:27 +00:00
|
|
|
linkend="DC-sendResult">DC_sendResult()</link></function>
|
2006-10-19 10:32:49 +00:00
|
|
|
places subresults generated by the client. Default
|
|
|
|
value is <filename>_dcapi_client_subresults</filename>.
|
|
|
|
|
2006-10-04 13:49:16 +00:00
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
2006-10-19 10:32:49 +00:00
|
|
|
<term>SystemMessageBox</term>
|
2006-10-04 13:49:16 +00:00
|
|
|
<listitem>
|
|
|
|
<para>
|
2006-10-19 10:32:49 +00:00
|
|
|
|
|
|
|
Name of the directory in workunit's working directory
|
|
|
|
where the master and client program place management
|
|
|
|
messages for example when the master asks the client to
|
|
|
|
suspend and it sends back an acknowlegde. Default value is
|
|
|
|
<filename>_dcapi_system_messages</filename>
|
|
|
|
|
2006-10-04 13:49:16 +00:00
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term>SubmitFile</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
2006-10-19 10:32:49 +00:00
|
|
|
|
|
|
|
Name of the file in workunit's working directory which is
|
|
|
|
generated by the master and used as submit information for
|
|
|
|
Condor when a workunit is prepared to start. Default value
|
|
|
|
is <filename>_dcapi_condor_submit.txt</filename>.
|
|
|
|
|
2006-10-04 13:49:16 +00:00
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term>Executable</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
2006-10-19 10:32:49 +00:00
|
|
|
|
|
|
|
Name of the executable file of the client (workunit). By
|
|
|
|
default it is the <parameter>clientName</parameter>
|
|
|
|
parameter which was passed to <function><link
|
2006-10-19 11:16:27 +00:00
|
|
|
linkend="DC-createWU">DC_createWU()</link></function>.
|
2006-10-19 10:32:49 +00:00
|
|
|
|
2006-10-04 13:49:16 +00:00
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term>LeaveFiles</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
2006-10-19 10:32:49 +00:00
|
|
|
|
|
|
|
Specifies if files, directories generated in workunit's
|
|
|
|
working directory should be deleted or not after workunit
|
|
|
|
ends. Zero value means delete and non-zero value means not
|
|
|
|
to delete. Default value is 0.
|
|
|
|
|
2006-10-04 13:49:16 +00:00
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term>CondorLog</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
2006-10-19 10:32:49 +00:00
|
|
|
|
|
|
|
Name of the file in workunit's working directory where
|
|
|
|
Condor writes records about events happen to the Condor
|
|
|
|
job. Default value is
|
|
|
|
<filename>_dcapi_internal_log.txt</filename>.
|
|
|
|
|
2006-10-04 13:49:16 +00:00
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term>CheckpointFile</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
2006-10-19 10:32:49 +00:00
|
|
|
|
|
|
|
Name of file in workunit's working directory where
|
|
|
|
checkpoint information is written by the
|
|
|
|
client. <function><link
|
2006-10-19 11:16:27 +00:00
|
|
|
linkend="DC-resolveFileName">DC_resolveFileName()</link></function>
|
2006-10-19 10:32:49 +00:00
|
|
|
will resolve <link
|
2006-10-19 11:24:08 +00:00
|
|
|
linkend="DC-CHECKPOINT-FILE:CAPS">DC_CHECKPOINT_FILE</link>
|
2006-10-19 10:32:49 +00:00
|
|
|
to this name. Default value is
|
|
|
|
<filename>_dcapi_checkpoint</filename>.
|
|
|
|
|
2006-10-04 13:49:16 +00:00
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term>SavedOutputs</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
2006-10-19 10:32:49 +00:00
|
|
|
|
|
|
|
Name of directory in workunit's working directory where
|
|
|
|
workunit's standard output is saved when it is
|
|
|
|
suspended. Deafult values is
|
|
|
|
<filename>_dcapi_saved_output</filename>. There is no
|
|
|
|
facility in the DC-API yet to merge saved output together.
|
|
|
|
|
2006-10-04 13:49:16 +00:00
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
2006-11-20 12:46:00 +00:00
|
|
|
<varlistentry>
|
|
|
|
<term>CondorSubmitTemplate</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
|
|
|
|
Name of the file which is used as template when generating
|
|
|
|
Condor submit file. If not specified then a built-in template
|
|
|
|
will be used. % character can be used to include variable data
|
|
|
|
into the generated file. Recorgnized % instructions:
|
|
|
|
|
|
|
|
<variablelist>
|
|
|
|
<varlistentry>
|
|
|
|
<term>%%</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Literal %.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term>%d</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Current date and time
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term>%n</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Name of the workunit.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term>%i</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Internal ID of the workunit.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term>%w</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Name of working directory of the workunit.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term>%c</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Client name.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term>%r</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Number of the arguments.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term>%x</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Name of the executable.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term>%a</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Argument list.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term>%u</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Condor universe (always vanilla).
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term>%o</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
File for standard output of the job.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term>%e</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
File for standard error of the job.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term>%l</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
File for Condor user log.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
2010-04-21 11:57:28 +00:00
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term>%I</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
2010-04-21 11:58:10 +00:00
|
|
|
Comma separated list of input files (physical filenames with path). Capital 'i'.
|
2010-04-21 11:57:28 +00:00
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term>%O</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
2010-04-21 11:58:10 +00:00
|
|
|
Comma separated list of output files.
|
2010-04-21 11:57:28 +00:00
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
2010-05-07 12:59:20 +00:00
|
|
|
<varlistentry>
|
|
|
|
<term>SubmitRetry</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
If a job cannot be submitted, how many times should DC-API try before giving
|
|
|
|
up and reporting it as failed. Default value is 5.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
|
|
<term>SubmitRetrySleepTime</term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
Defines the start value for the sleep period between job submission retries.
|
|
|
|
Default value is 2. It is multiplied by 2 after each retry, so 2 seconds sleep
|
|
|
|
before the first retry, 4 seconds before the second, 8 second before the third
|
|
|
|
and so on.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
2006-11-20 12:46:00 +00:00
|
|
|
</variablelist>
|
|
|
|
|
2010-05-07 12:59:20 +00:00
|
|
|
|
|
|
|
|
2006-11-20 12:46:00 +00:00
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
|
|
|
|
</varlistentry>
|
|
|
|
|
2006-10-04 13:49:16 +00:00
|
|
|
</variablelist>
|
|
|
|
|
2006-10-03 20:59:21 +00:00
|
|
|
</sect2>
|
|
|
|
</sect1>
|
|
|
|
|
|
|
|
<!-- End of condor.xml -->
|